| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- # -*- coding: utf-8 -*-
- # import re
- import datetime
- import re
- import scrapy
- from ..items import Zujieguo
- class HgjieshuSpider(scrapy.Spider):
- name = 'zq_jieshu'
- to_day = datetime.datetime.now()
- allowed_domains = ['hg3535z.com']
- custom_settings = {
- "ITEM_PIPELINES":{
- 'hg3535.pipelines.Zujieshuqiupipeline': 300,
- },
- 'LOG_LEVEL': 'DEBUG',
- 'LOG_FILE': "../hg3535/log/zq_jieshu_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
- }
- # start_urls = ['https://hg3535z.com/zh-cn/info-centre/sportsbook-info/results/1/normal/1', 'https://hg3535z.com/zh-cn/info-centre/sportsbook-info/results/1/normal/2']
- def start_requests(self):
- urls = ['https://hg3535z.com/zh-cn/info-centre/sportsbook-info/results/1/normal/1','https://hg3535z.com/zh-cn/info-centre/sportsbook-info/results/1/normal/2']
- for url in urls:
- yield scrapy.Request(url, callback=self.parse, dont_filter=True)
- def parse(self, response):
- if response.status == 200:
- # 所有比赛对象
- # tema = response.xpath('//div[@class="rt-event"]//span[@class="pt"]/text()')
- # print(tema)
- # 所有比赛队名
- # tema_name = [i.extract() for i in tema]
- # 获得所有比分对象
- # tema_score = response.xpath('//div[contains(@class,"rt-ft ")]')
- tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
- # print(tema_score)
- # 获得所有比赛id对象
- tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
- # str.replace()
- # 所有比赛id列表
- temaid_list = [i.extract().replace('e-', "") for i in tema_id]
- temascore_list = []
- for score in tema_score:
- # 正则匹配规则
- p1 = r"\d{1,3}-\d{1,3}"
- pattern1 = re.compile(p1)
- try:
- # 获取正则匹配结果
- c = pattern1.findall(score.extract())[0]
- temascore_list.append(c)
- except:
- c = ""
- temascore_list.append(c)
- # print(temaid_list)
- # print(temascore_list)
- # print(len(temaid_list))
- # print(len(temascore_list))
- # 赛事id,赛事比元组列表
- tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
- print(tema_tupe)
- # print(tema_tupe)
- # print(len(tema_tupe))
- for y in tema_tupe:
- if y[1]:
- item = Zujieguo()
- item['id_score'] = y
- yield item
|