hgjieshu.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. # -*- coding: utf-8 -*-
  2. # import re
  3. import scrapy
  4. class HgjieshuSpider(scrapy.Spider):
  5. name = 'hgjieshu'
  6. allowed_domains = ['hg3535z.com']
  7. start_urls = ['https://hg3535z.com/zh-cn/info-centre/sportsbook-info/results/1/normal/1']
  8. def parse(self, response):
  9. # a = response.xpath('//*[@id="dt-cmp-66148"]/div[1]/div[3]/span[3]/text()')[0].extract_first()
  10. if response.status == 200:
  11. tema = response.xpath('//div[@class="rt-event"]//span[@class="pt"]/text()')
  12. # print(tema)
  13. aa = [i.extract() for i in tema ]
  14. # b = response.xpath('//div[@class="rt-ft "]/text()') #比分
  15. b = response.xpath('//div[contains(@class,"rt-ft")]')#比分')]/text()') #比分
  16. # print(b)
  17. # for i in tema:
  18. # print(i.extract())
  19. # c = i.xpath('//div/text()')
  20. # # yield c
  21. # i = i.extract()
  22. # print(i)
  23. # aa = [(x, y, z.extract().split('">')[1].split('</')[0].strip())for x in tema if x % 2 == 0 for y in tema if x % 2 == 1 for z in b]
  24. cc = [z.extract().split('">')[1].split('</')[0].strip() for z in b]
  25. print(cc)
  26. # dd = [(aa[x], aa[y], z) for x in range(len(aa)) if x % 2 == 0 for y in range(len(aa)) if y % 2 == 1 for z in cc]
  27. print(cc)
  28. print(len(cc))
  29. print(len(tema))
  30. print(len(aa))
  31. print(aa)
  32. # print(dd)
  33. # print(type(cc))
  34. # break
  35. # i = i
  36. # yield i
  37. # print(i.extract())
  38. # c = i.xpath()
  39. # print(i.xpath('//div/text()'))
  40. # print(type(i.extract()))
  41. # d = i.xpath('//div/text()')
  42. # print(c)
  43. # print(a)
  44. # print(len(a))
  45. # # print(b)
  46. # print(len(b))
  47. # for e in d:
  48. # print(e)
  49. pass