|
|
@@ -1,137 +0,0 @@
|
|
|
-import datetime
|
|
|
-import re
|
|
|
-
|
|
|
-import scrapy
|
|
|
-
|
|
|
-from ..items import Hgsaiguo
|
|
|
-
|
|
|
-
|
|
|
-class HgjieshuSpider(scrapy.Spider):
|
|
|
- name = 'other_saiguo'
|
|
|
- to_day = datetime.datetime.now()
|
|
|
- allowed_domains = ['hg3535z.com']
|
|
|
- custom_settings = {
|
|
|
- "ITEM_PIPELINES": {
|
|
|
- 'hg3535.pipeline.saiguo.Jieshuqiupipeline': 300,
|
|
|
- },
|
|
|
- # 'LOG_LEVEL': 'DEBUG',
|
|
|
- # 'LOG_FILE': "../hg3535/log/saiguo{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
|
|
|
- }
|
|
|
-
|
|
|
- def start_requests(self):
|
|
|
- for y in range(1, 5):
|
|
|
- url = 'https://www.hg3535.cn/zh-cn/info-centre/sportsbook-info/results/{}/normal/1'.format(y)
|
|
|
- yield scrapy.Request(url=url, callback=self.parse, dont_filter=True, meta={'pt': y})
|
|
|
-
|
|
|
- def parse(self, response):
|
|
|
- if response.status == 200:
|
|
|
- pt = response.meta['pt']
|
|
|
- if pt == 1:
|
|
|
- tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
|
|
|
- # 获得所有比赛id对象
|
|
|
- tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
|
|
|
- # 所有比赛id列表
|
|
|
- temaid_list = [i.extract().replace('e-', "") for i in tema_id]
|
|
|
- temascore_list = []
|
|
|
- for score in tema_score:
|
|
|
- # 正则匹配规则
|
|
|
- p1 = r"\d{1,3}-\d{1,3}"
|
|
|
- pattern1 = re.compile(p1)
|
|
|
- try:
|
|
|
- # 获取正则匹配结果
|
|
|
- c = pattern1.findall(score.extract())[0]
|
|
|
- temascore_list.append(c)
|
|
|
- except:
|
|
|
- c = ""
|
|
|
- temascore_list.append(c)
|
|
|
- # 赛事id,赛事比元组列表
|
|
|
- tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
|
|
|
- for y in tema_tupe:
|
|
|
- if y[1]:
|
|
|
- item = Hgsaiguo()
|
|
|
- item['id_score'] = y
|
|
|
- item['pt'] = pt
|
|
|
- yield item
|
|
|
- if pt == 2:
|
|
|
- tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
|
|
|
- # 获得所有比赛id对象
|
|
|
- tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
|
|
|
- # 所有比赛id列表
|
|
|
- temaid_list = [i.extract().replace('e-', "") for i in tema_id]
|
|
|
- temascore_list = []
|
|
|
- for score in tema_score:
|
|
|
- # 正则匹配规则
|
|
|
- p1 = r"\d{1,3}-\d{1,3}"
|
|
|
- pattern1 = re.compile(p1)
|
|
|
- try:
|
|
|
- # 获取正则匹配结果
|
|
|
- c = pattern1.findall(score.extract())[0]
|
|
|
- temascore_list.append(c)
|
|
|
- except:
|
|
|
- c = ""
|
|
|
- temascore_list.append(c)
|
|
|
- # 赛事id,赛事比元组列表
|
|
|
- tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
|
|
|
- for y in tema_tupe:
|
|
|
- if y[1]:
|
|
|
- item = Hgsaiguo()
|
|
|
- item['id_score'] = y
|
|
|
- item['pt'] = pt
|
|
|
- yield item
|
|
|
-
|
|
|
- if pt == 3:
|
|
|
- # 获得所有比赛获胜人,判断赛事是否结束
|
|
|
- # tema_score = response.xpath('//div[@class="flex-wrap"]/../div[4]/text()')
|
|
|
- tema_score = response.xpath('//div[@class="flex-wrap"]/../div[4]/@title')
|
|
|
- # 获得所有比赛id对象
|
|
|
- tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
|
|
|
- # 所有比赛id列表
|
|
|
- temaid_list = [i.extract().replace('e-', "") for i in tema_id]
|
|
|
- temascore_list = []
|
|
|
- for score in tema_score:
|
|
|
- # 正则匹配规则
|
|
|
- # p1 = r"\d{1,3}-\d{1,3}"
|
|
|
- # pattern1 = re.compile(p1)
|
|
|
- try:
|
|
|
- # 获取正则匹配结果
|
|
|
- c = score.extract()
|
|
|
- temascore_list.append(c)
|
|
|
- except:
|
|
|
- c = ""
|
|
|
- temascore_list.append(c)
|
|
|
- # 赛事id,赛事比元组列表
|
|
|
- tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
|
|
|
- for y in tema_tupe:
|
|
|
- if y[1]:
|
|
|
- item = Hgsaiguo()
|
|
|
- item['id_score'] = y
|
|
|
- item['pt'] = pt
|
|
|
- yield item
|
|
|
-
|
|
|
- if pt == 4:
|
|
|
- tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
|
|
|
- # 获得所有比赛id对象
|
|
|
- tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
|
|
|
- # str.replace()
|
|
|
- # 所有比赛id列表
|
|
|
- temaid_list = [i.extract().replace('e-', "") for i in tema_id]
|
|
|
- temascore_list = []
|
|
|
- for score in tema_score:
|
|
|
- # 正则匹配规则
|
|
|
- p1 = r"\d{1,3}-\d{1,3}"
|
|
|
- pattern1 = re.compile(p1)
|
|
|
- try:
|
|
|
- # 获取正则匹配结果
|
|
|
- c = pattern1.findall(score.extract())[0]
|
|
|
- temascore_list.append(c)
|
|
|
- except:
|
|
|
- c = ""
|
|
|
- temascore_list.append(c)
|
|
|
- # 赛事id,赛事比元组列表
|
|
|
- tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
|
|
|
- for y in tema_tupe:
|
|
|
- if y[1]:
|
|
|
- item = Hgsaiguo()
|
|
|
- item['id_score'] = y
|
|
|
- item['pt'] = pt
|
|
|
- yield item
|