| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657 |
- # -*- coding: utf-8 -*-
- # import re
- import scrapy
- class HgjieshuSpider(scrapy.Spider):
- name = 'hgjieshu'
- allowed_domains = ['hg3535z.com']
- start_urls = ['https://hg3535z.com/zh-cn/info-centre/sportsbook-info/results/1/normal/1']
- def parse(self, response):
- # a = response.xpath('//*[@id="dt-cmp-66148"]/div[1]/div[3]/span[3]/text()')[0].extract_first()
- if response.status == 200:
- tema = response.xpath('//div[@class="rt-event"]//span[@class="pt"]/text()')
- # print(tema)
- aa = [i.extract() for i in tema ]
- # b = response.xpath('//div[@class="rt-ft "]/text()') #比分
- b = response.xpath('//div[contains(@class,"rt-ft")]')#比分')]/text()') #比分
- # print(b)
- # for i in tema:
- # print(i.extract())
- # c = i.xpath('//div/text()')
- # # yield c
- # i = i.extract()
- # print(i)
- # aa = [(x, y, z.extract().split('">')[1].split('</')[0].strip())for x in tema if x % 2 == 0 for y in tema if x % 2 == 1 for z in b]
- cc = [z.extract().split('">')[1].split('</')[0].strip() for z in b]
- print(cc)
- # dd = [(aa[x], aa[y], z) for x in range(len(aa)) if x % 2 == 0 for y in range(len(aa)) if y % 2 == 1 for z in cc]
- print(cc)
- print(len(cc))
- print(len(tema))
- print(len(aa))
- print(aa)
- # print(dd)
- # print(type(cc))
- # break
- # i = i
- # yield i
- # print(i.extract())
- # c = i.xpath()
- # print(i.xpath('//div/text()'))
- # print(type(i.extract()))
- # d = i.xpath('//div/text()')
- # print(c)
- # print(a)
- # print(len(a))
- # # print(b)
- # print(len(b))
- # for e in d:
- # print(e)
- pass
|