saiguo.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. import datetime
  2. import re
  3. import scrapy
  4. from ..items import Hgsaiguo
  5. class HgjieshuSpider(scrapy.Spider):
  6. name = 'saiguo'
  7. to_day = datetime.datetime.now()
  8. allowed_domains = ['hg3535z.com']
  9. custom_settings = {
  10. "ITEM_PIPELINES": {
  11. 'hg3535.pipeline.saiguo.Jieshuqiupipeline': 300,
  12. },
  13. # 'LOG_LEVEL': 'DEBUG',
  14. # 'LOG_FILE': "../hg3535/log/saiguo{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
  15. }
  16. def start_requests(self):
  17. for y in range(1, 5):
  18. url = 'https://www.hg3535.cn/zh-cn/info-centre/sportsbook-info/results/{}/normal/1'.format(y)
  19. yield scrapy.Request(url=url, callback=self.parse, dont_filter=True, meta={'pt': y})
  20. def parse(self, response):
  21. if response.status == 200:
  22. pt = response.meta['pt']
  23. if pt == 1:
  24. tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
  25. # 获得所有比赛id对象
  26. tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
  27. # 所有比赛id列表
  28. temaid_list = [i.extract().replace('e-', "") for i in tema_id]
  29. temascore_list = []
  30. for score in tema_score:
  31. # 正则匹配规则
  32. p1 = r"\d{1,3}-\d{1,3}"
  33. pattern1 = re.compile(p1)
  34. try:
  35. # 获取正则匹配结果
  36. c = pattern1.findall(score.extract())[0]
  37. temascore_list.append(c)
  38. except:
  39. c = ""
  40. temascore_list.append(c)
  41. # 赛事id,赛事比元组列表
  42. tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
  43. for y in tema_tupe:
  44. if y[1]:
  45. item = Hgsaiguo()
  46. item['id_score'] = y
  47. item['pt'] = pt
  48. yield item
  49. if pt == 2:
  50. tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
  51. # 获得所有比赛id对象
  52. tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
  53. # 所有比赛id列表
  54. temaid_list = [i.extract().replace('e-', "") for i in tema_id]
  55. temascore_list = []
  56. for score in tema_score:
  57. # 正则匹配规则
  58. p1 = r"\d{1,3}-\d{1,3}"
  59. pattern1 = re.compile(p1)
  60. try:
  61. # 获取正则匹配结果
  62. c = pattern1.findall(score.extract())[0]
  63. temascore_list.append(c)
  64. except:
  65. c = ""
  66. temascore_list.append(c)
  67. # 赛事id,赛事比元组列表
  68. tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
  69. for y in tema_tupe:
  70. if y[1]:
  71. item = Hgsaiguo()
  72. item['id_score'] = y
  73. item['pt'] = pt
  74. yield item
  75. if pt == 3:
  76. # 获得所有比赛获胜人,判断赛事是否结束
  77. # tema_score = response.xpath('//div[@class="flex-wrap"]/../div[4]/text()')
  78. tema_score = response.xpath('//div[@class="flex-wrap"]/../div[4]/@title')
  79. # 获得所有比赛id对象
  80. tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
  81. # 所有比赛id列表
  82. temaid_list = [i.extract().replace('e-', "") for i in tema_id]
  83. temascore_list = []
  84. for score in tema_score:
  85. # 正则匹配规则
  86. # p1 = r"\d{1,3}-\d{1,3}"
  87. # pattern1 = re.compile(p1)
  88. try:
  89. # 获取正则匹配结果
  90. c = score.extract()
  91. temascore_list.append(c)
  92. except:
  93. c = ""
  94. temascore_list.append(c)
  95. # 赛事id,赛事比元组列表
  96. tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
  97. for y in tema_tupe:
  98. if y[1]:
  99. item = Hgsaiguo()
  100. item['id_score'] = y
  101. item['pt'] = pt
  102. yield item
  103. if pt == 4:
  104. tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
  105. # 获得所有比赛id对象
  106. tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
  107. # str.replace()
  108. # 所有比赛id列表
  109. temaid_list = [i.extract().replace('e-', "") for i in tema_id]
  110. temascore_list = []
  111. for score in tema_score:
  112. # 正则匹配规则
  113. p1 = r"\d{1,3}-\d{1,3}"
  114. pattern1 = re.compile(p1)
  115. try:
  116. # 获取正则匹配结果
  117. c = pattern1.findall(score.extract())[0]
  118. temascore_list.append(c)
  119. except:
  120. c = ""
  121. temascore_list.append(c)
  122. # 赛事id,赛事比元组列表
  123. tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
  124. for y in tema_tupe:
  125. if y[1]:
  126. item = Hgsaiguo()
  127. item['id_score'] = y
  128. item['pt'] = pt
  129. yield item