lq_jieshu.py 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. # -*- coding: utf-8 -*-
  2. import datetime
  3. import re
  4. import scrapy
  5. from ..items import Lanjieguo
  6. class HgjieshuSpider(scrapy.Spider):
  7. name = 'lq_jieshu'
  8. to_day = datetime.datetime.now()
  9. allowed_domains = ['hg3535z.com']
  10. custom_settings = {
  11. "ITEM_PIPELINES":{
  12. 'hg3535.pipeline.Lanjieshuqiupipeline': 300,
  13. },
  14. 'LOG_LEVEL': 'DEBUG',
  15. 'LOG_FILE': "../hg3535/log/lq_jieshu_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
  16. }
  17. start_urls = ['https://hg3535z.com/zh-cn/info-centre/sportsbook-info/results/2/normal/1']
  18. def parse(self, response):
  19. if response.status == 200:
  20. # 所有比赛对象
  21. # tema = response.xpath('//div[@class="rt-event"]//span[@class="pt"]/text()')
  22. # print(tema)
  23. # 所有比赛队名
  24. # tema_name = [i.extract() for i in tema]
  25. # 获得所有比分对象
  26. # tema_score = response.xpath('//div[contains(@class,"rt-qft")]')
  27. tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
  28. # print(tema_score)
  29. # 获得所有比赛id对象
  30. tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
  31. # str.replace()
  32. # 所有比赛id列表
  33. temaid_list = [i.extract().replace('e-', "") for i in tema_id]
  34. temascore_list = []
  35. for score in tema_score:
  36. # 正则匹配规则
  37. p1 = r"\d{1,3}-\d{1,3}"
  38. pattern1 = re.compile(p1)
  39. try:
  40. # 获取正则匹配结果
  41. c = pattern1.findall(score.extract())[0]
  42. temascore_list.append(c)
  43. except:
  44. c = ""
  45. temascore_list.append(c)
  46. # print(temaid_list)
  47. # print(temascore_list)
  48. # print(len(temaid_list))
  49. # print(len(temascore_list))
  50. # 赛事id,赛事比元组列表
  51. tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
  52. print(tema_tupe)
  53. # # print(len(tema_tupe))
  54. # for y in tema_tupe:
  55. # if y[1]:
  56. # item = Lanjieguo()
  57. # item['id_score'] = y
  58. # yield item
  59. id_list = []
  60. item = Lanjieguo()
  61. for y in tema_tupe:
  62. if y[1]:
  63. id_list.append(y[0])
  64. item['id_score'] = id_list
  65. yield item