saiguo.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. import datetime
  2. import re
  3. import scrapy
  4. from ..items import Hgsaiguo
  5. class HgjieshuSpider(scrapy.Spider):
  6. name = 'saiguo'
  7. to_day = datetime.datetime.now()
  8. allowed_domains = ['hg3535z.com']
  9. custom_settings = {
  10. "ITEM_PIPELINES": {
  11. 'hg3535.pipeline.saiguo.Jieshuqiupipeline': 300,
  12. },
  13. # 'LOG_LEVEL': 'DEBUG',
  14. # 'LOG_FILE': "../hg3535/log/zq_jieshu_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
  15. }
  16. def start_requests(self):
  17. for y in range(1, 5):
  18. url = 'https://www.hg3535.cn/zh-cn/info-centre/sportsbook-info/results/{}/normal/1'.format(y)
  19. yield scrapy.Request(url=url, callback=self.parse, dont_filter=True, meta={'pt': y})
  20. def parse(self, response):
  21. if response.status == 200:
  22. pt = response.meta['pt']
  23. if pt == 1:
  24. tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
  25. # 获得所有比赛id对象
  26. tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
  27. # 所有比赛id列表
  28. temaid_list = [i.extract().replace('e-', "") for i in tema_id]
  29. temascore_list = []
  30. for score in tema_score:
  31. # 正则匹配规则
  32. p1 = r"\d{1,3}-\d{1,3}"
  33. pattern1 = re.compile(p1)
  34. try:
  35. # 获取正则匹配结果
  36. c = pattern1.findall(score.extract())[0]
  37. temascore_list.append(c)
  38. except:
  39. c = ""
  40. temascore_list.append(c)
  41. # 赛事id,赛事比元组列表
  42. tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
  43. for y in tema_tupe:
  44. if y[1]:
  45. item = Hgsaiguo()
  46. item['id_score'] = y
  47. item['pt'] = pt
  48. yield item
  49. if pt == 2:
  50. tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
  51. # 获得所有比赛id对象
  52. tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
  53. # 所有比赛id列表
  54. temaid_list = [i.extract().replace('e-', "") for i in tema_id]
  55. temascore_list = []
  56. for score in tema_score:
  57. # 正则匹配规则
  58. p1 = r"\d{1,3}-\d{1,3}"
  59. pattern1 = re.compile(p1)
  60. try:
  61. # 获取正则匹配结果
  62. c = pattern1.findall(score.extract())[0]
  63. temascore_list.append(c)
  64. except:
  65. c = ""
  66. temascore_list.append(c)
  67. # 赛事id,赛事比元组列表
  68. tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
  69. for y in tema_tupe:
  70. if y[1]:
  71. item = Hgsaiguo()
  72. item['id_score'] = y
  73. item['pt'] = pt
  74. yield item
  75. if pt == 3:
  76. # 获得所有比赛获胜人,判断赛事是否结束
  77. tema_score = response.xpath('//div[@class="flex-wrap"]/../div[4]/text()')
  78. # 获得所有比赛id对象
  79. tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
  80. # 所有比赛id列表
  81. temaid_list = [i.extract().replace('e-', "") for i in tema_id]
  82. temascore_list = []
  83. for score in tema_score:
  84. # 正则匹配规则
  85. # p1 = r"\d{1,3}-\d{1,3}"
  86. # pattern1 = re.compile(p1)
  87. try:
  88. # 获取正则匹配结果
  89. c = score.extract()
  90. temascore_list.append(c)
  91. except:
  92. c = ""
  93. temascore_list.append(c)
  94. # 赛事id,赛事比元组列表
  95. tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
  96. for y in tema_tupe:
  97. if y[1]:
  98. item = Hgsaiguo()
  99. item['id_score'] = y
  100. item['pt'] = pt
  101. yield item
  102. if pt == 4:
  103. tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
  104. # 获得所有比赛id对象
  105. tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
  106. # str.replace()
  107. # 所有比赛id列表
  108. temaid_list = [i.extract().replace('e-', "") for i in tema_id]
  109. temascore_list = []
  110. for score in tema_score:
  111. # 正则匹配规则
  112. p1 = r"\d{1,3}-\d{1,3}"
  113. pattern1 = re.compile(p1)
  114. try:
  115. # 获取正则匹配结果
  116. c = pattern1.findall(score.extract())[0]
  117. temascore_list.append(c)
  118. except:
  119. c = ""
  120. temascore_list.append(c)
  121. # 赛事id,赛事比元组列表
  122. tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
  123. for y in tema_tupe:
  124. if y[1]:
  125. item = Hgsaiguo()
  126. item['id_score'] = y
  127. item['pt'] = pt
  128. yield item