zq_jieshu.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. # -*- coding: utf-8 -*-
  2. # import re
  3. import datetime
  4. import re
  5. import scrapy
  6. from ..items import Zujieguo
  7. class HgjieshuSpider(scrapy.Spider):
  8. name = 'zq_jieshu'
  9. to_day = datetime.datetime.now()
  10. allowed_domains = ['hg3535z.com']
  11. custom_settings = {
  12. "ITEM_PIPELINES":{
  13. 'hg3535.pipelines.Zujieshuqiupipeline': 300,
  14. },
  15. # 'LOG_LEVEL': 'DEBUG',
  16. # 'LOG_FILE': "../hg3535/log/zq_jieshu_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
  17. }
  18. start_urls = ['https://hg3535z.com/zh-cn/info-centre/sportsbook-info/results/1/normal/1']
  19. def parse(self, response):
  20. if response.status == 200:
  21. # 所有比赛对象
  22. # tema = response.xpath('//div[@class="rt-event"]//span[@class="pt"]/text()')
  23. # print(tema)
  24. # 所有比赛队名
  25. # tema_name = [i.extract() for i in tema]
  26. # 获得所有比分对象
  27. # tema_score = response.xpath('//div[contains(@class,"rt-ft ")]')
  28. tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
  29. # print(tema_score)
  30. # 获得所有比赛id对象
  31. tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
  32. # str.replace()
  33. # 所有比赛id列表
  34. temaid_list = [i.extract().replace('e-', "") for i in tema_id]
  35. temascore_list = []
  36. for score in tema_score:
  37. # 正则匹配规则
  38. p1 = r"\d{1,3}-\d{1,3}"
  39. pattern1 = re.compile(p1)
  40. try:
  41. # 获取正则匹配结果
  42. c = pattern1.findall(score.extract())[0]
  43. temascore_list.append(c)
  44. except:
  45. c = ""
  46. temascore_list.append(c)
  47. # print(temaid_list)
  48. # print(temascore_list)
  49. # print(len(temaid_list))
  50. # print(len(temascore_list))
  51. # 赛事id,赛事比元组列表
  52. tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
  53. print(tema_tupe)
  54. # print(tema_tupe)
  55. # print(len(tema_tupe))
  56. for y in tema_tupe:
  57. if y[1]:
  58. item = Zujieguo()
  59. item['id_score'] = y
  60. yield item