|
|
@@ -4,7 +4,8 @@ import re
|
|
|
import scrapy
|
|
|
from lxml import etree
|
|
|
|
|
|
-# from ..items import Hgsaiguo
|
|
|
+from ..items import Hgsaiguo
|
|
|
+
|
|
|
|
|
|
|
|
|
class HgjieshuSpider(scrapy.Spider):
|
|
|
@@ -21,12 +22,19 @@ class HgjieshuSpider(scrapy.Spider):
|
|
|
|
|
|
def start_requests(self):
|
|
|
for y in range(1, 5):
|
|
|
- url = 'https://www.hg3535.cn/zh-cn/info-centre/sportsbook-info/results/{}/normal/3'.format(y)
|
|
|
- yield scrapy.Request(url=url, callback=self.parse, dont_filter=True, meta={'pt': y})
|
|
|
+ for z in range(1, 3):
|
|
|
+ url = 'https://www.hg3535.cn/zh-cn/info-centre/sportsbook-info/results/{}/normal/{}'.format(y, z)
|
|
|
+ yield scrapy.Request(url=url, callback=self.parse, dont_filter=True, meta={'pt': y, "page": z})
|
|
|
|
|
|
def parse(self, response):
|
|
|
if response.status == 200:
|
|
|
pt = response.meta['pt']
|
|
|
+ page = response.meta['page']
|
|
|
+ if page == 1:
|
|
|
+ us_datetime = datetime.datetime.now() - datetime.timedelta(hours=12)
|
|
|
+ else:
|
|
|
+ us_datetime = datetime.datetime.now() - datetime.timedelta(hours=36)
|
|
|
+ match_date = us_datetime.strftime("%Y-%m-%d")
|
|
|
# 足球赛果
|
|
|
if pt == 1:
|
|
|
league_ids = response.xpath('//div[@class="rt-l-bar football"]/@id').extract()
|
|
|
@@ -35,7 +43,6 @@ class HgjieshuSpider(scrapy.Spider):
|
|
|
league_id = league_ids[index]
|
|
|
league_name = league_names[index]
|
|
|
response_data = response.xpath('//div[@id="dt-{}"]'.format(league_id)).extract_first()
|
|
|
- # response_data = response.xpath('//div[@id="dt-{}"]'.format('cmp-36254')).extract_first()
|
|
|
data = etree.HTML(response_data)
|
|
|
# 球队名
|
|
|
team_names = data.xpath('//div[@class="rt-event"]/@title')
|
|
|
@@ -63,14 +70,14 @@ class HgjieshuSpider(scrapy.Spider):
|
|
|
h_name = team_name[0]
|
|
|
# 客队
|
|
|
a_name = team_name[1]
|
|
|
- print(h_name, a_name)
|
|
|
# 上半场
|
|
|
h_score = h_scores[y].xpath('string(.)').replace(' ', '').replace('\r\n', '')
|
|
|
# 全场
|
|
|
f_score = f_scores[y].xpath('string(.)').replace(' ', '').replace('\r\n', '')
|
|
|
# 正则时间匹配规则
|
|
|
pattern = re.compile(r"\d{1,3}:\d{1,3}")
|
|
|
- stime = pattern.findall(stimes[y])[0]
|
|
|
+ match_time = pattern.findall(stimes[y])[0]
|
|
|
+ play_datas = []
|
|
|
if odd_names:
|
|
|
for i in range(len(odd_names)):
|
|
|
name = odd_names[i].text
|
|
|
@@ -79,8 +86,21 @@ class HgjieshuSpider(scrapy.Spider):
|
|
|
play = '{}&&{}'.format(plays[0], plays[1])
|
|
|
else:
|
|
|
play = plays[0]
|
|
|
- else:
|
|
|
- pass
|
|
|
+ play_datas.append({'play_name': name, 'play_result': play})
|
|
|
+ item = Hgsaiguo()
|
|
|
+ item["league_id"] = league_id
|
|
|
+ item["league_name"] = league_name
|
|
|
+ item["match_id"] = match_id
|
|
|
+ item["match_date"] = match_date
|
|
|
+ item["match_time"] = match_time
|
|
|
+ item["home_team"] = h_name
|
|
|
+ item["guest_team"] = a_name
|
|
|
+ item["score_half"] = h_score
|
|
|
+ item["score_full"] = f_score
|
|
|
+ item["play_data"] = play_datas
|
|
|
+ item["pt"] = pt
|
|
|
+ item["page"] = page
|
|
|
+ yield item
|
|
|
|
|
|
# 篮球赛果
|
|
|
if pt == 2:
|
|
|
@@ -90,7 +110,6 @@ class HgjieshuSpider(scrapy.Spider):
|
|
|
league_id = league_ids[index]
|
|
|
league_name = league_names[index]
|
|
|
response_data = response.xpath('//div[@id="dt-{}"]'.format(league_id)).extract_first()
|
|
|
- # response_data = response.xpath('//div[@id="dt-{}"]'.format('cmp-26405')).extract_first()
|
|
|
data = etree.HTML(response_data)
|
|
|
# 球队名
|
|
|
team_names = data.xpath('//div[@class="rt-event"]/@title')
|
|
|
@@ -114,7 +133,6 @@ class HgjieshuSpider(scrapy.Spider):
|
|
|
h_name = team_name[0]
|
|
|
# 客队
|
|
|
a_name = team_name[1]
|
|
|
- print(h_name, a_name)
|
|
|
# 上半场
|
|
|
h_score = h_scores[y].xpath('string(.)').replace(' ', '').replace('\r\n', '')
|
|
|
# 全场
|
|
|
@@ -123,25 +141,47 @@ class HgjieshuSpider(scrapy.Spider):
|
|
|
x_score = x_scores[y].xpath('string(.)').replace(' ', '').replace('\r\n', '')
|
|
|
# 正则时间匹配规则
|
|
|
pattern = re.compile(r"\d{1,3}:\d{1,3}")
|
|
|
- stime = pattern.findall(stimes[y])[0]
|
|
|
+ match_time = pattern.findall(stimes[y])[0]
|
|
|
+ play_datas = []
|
|
|
if child_data:
|
|
|
+ h_dict = {'team_name': h_name}
|
|
|
+ a_dict = {'team_name': a_name}
|
|
|
for i in range(len(child_data)):
|
|
|
if i == 0:
|
|
|
h_datas = child_data[i].xpath('.//td/table/tbody/tr[3]/td[@class="r-odds"]')
|
|
|
a_datas = child_data[i].xpath('.//td/table/tbody/tr[4]/td[@class="r-odds"]')
|
|
|
+ rule = {0: "sc_1th", 1: "sc_2th", 2: "sc_3th", 3: "sc_4th", 4: "sc_other"}
|
|
|
if h_datas and a_datas:
|
|
|
for x in range(len(h_datas)):
|
|
|
# 主队节得分
|
|
|
h_data = h_datas[x].text.replace(' ', '').replace('\r\n', '')
|
|
|
+ h_dict[rule[x]] = h_data
|
|
|
# 客队节得分
|
|
|
a_data = a_datas[x].text.replace(' ', '').replace('\r\n', '')
|
|
|
+ a_dict[rule[x]] = a_data
|
|
|
else:
|
|
|
# 子玩法名
|
|
|
child_name = child_data[i].xpath('.//td[contains(@class, "r-bt ")]/text()')[0].replace(' ', '').replace('\r\n', '')
|
|
|
# 子玩法结果
|
|
|
child_play = child_data[i].xpath('.//td[@class="r-odds"]/span[@class="prop"]/text()')[0]
|
|
|
- else:
|
|
|
- pass
|
|
|
+ play_datas.append({"play_name": child_name, "play_result": child_play})
|
|
|
+ play_datas.append(h_dict)
|
|
|
+ play_datas.append(a_dict)
|
|
|
+ item = Hgsaiguo()
|
|
|
+ item["league_id"] = league_id
|
|
|
+ item["league_name"] = league_name
|
|
|
+ item["match_id"] = match_id
|
|
|
+ item["match_date"] = match_date
|
|
|
+ item["match_time"] = match_time
|
|
|
+ item["home_team"] = h_name
|
|
|
+ item["guest_team"] = a_name
|
|
|
+ item["score_half"] = h_score
|
|
|
+ item["score_result"] = f_score
|
|
|
+ item["play_data"] = play_datas
|
|
|
+ item["pt"] = pt
|
|
|
+ item["page"] = page
|
|
|
+ item["score_below"] = x_score
|
|
|
+ yield item
|
|
|
|
|
|
# 网球赛果
|
|
|
if pt == 3:
|
|
|
@@ -151,7 +191,6 @@ class HgjieshuSpider(scrapy.Spider):
|
|
|
league_id = league_ids[index]
|
|
|
league_name = league_names[index]
|
|
|
response_data = response.xpath('//div[@id="dt-{}"]'.format(league_id)).extract_first()
|
|
|
- # response_data = response.xpath('//div[@id="dt-{}"]'.format('cmp-26405')).extract_first()
|
|
|
data = etree.HTML(response_data)
|
|
|
# 球队名
|
|
|
team_names = data.xpath('//div[@class="rt-event"]/@title')
|
|
|
@@ -173,13 +212,16 @@ class HgjieshuSpider(scrapy.Spider):
|
|
|
h_name = team_name[0]
|
|
|
# 客队
|
|
|
a_name = team_name[1]
|
|
|
- print(h_name, a_name)
|
|
|
# 全场
|
|
|
f_score = f_scores[y].xpath('string(.)').replace(' ', '').replace('\r\n', '')
|
|
|
# 正则时间匹配规则
|
|
|
pattern = re.compile(r"\d{1,3}:\d{1,3}")
|
|
|
- stime = pattern.findall(stimes[y])[0]
|
|
|
+ match_time = pattern.findall(stimes[y])[0]
|
|
|
+ play_datas = []
|
|
|
if child_data:
|
|
|
+ rule = {0: "sc_1th", 1: "sc_2th", 2: "sc_3th", 3: "sc_4th", 4: "sc_5th", 5: "game_num", 6: "disc_num"}
|
|
|
+ h_dict = {'team_name': h_name}
|
|
|
+ a_dict = {'team_name': a_name}
|
|
|
for i in range(len(child_data)):
|
|
|
if i == 0:
|
|
|
h_datas = child_data[i].xpath('.//tbody/tr[3]/td[contains(@class, "r-odds")]')
|
|
|
@@ -188,17 +230,32 @@ class HgjieshuSpider(scrapy.Spider):
|
|
|
for x in range(len(h_datas)):
|
|
|
# 主队节得分
|
|
|
h_data = h_datas[x].text.replace(' ', '').replace('\r\n', '')
|
|
|
+ h_dict[rule[x]] = h_data
|
|
|
# 客队节得分
|
|
|
a_data = a_datas[x].text.replace(' ', '').replace('\r\n', '')
|
|
|
+ a_dict[rule[x]] = a_data
|
|
|
else:
|
|
|
# 子玩法名
|
|
|
child_name = child_data[i].xpath('.//td[contains(@class, "r-bt ")]/text()')[0].replace(' ', '').replace('\r\n', '')
|
|
|
# 子玩法结果
|
|
|
child_play = child_data[i].xpath('.//td[@class="r-odds"]/span[@class="prop"]')[0]
|
|
|
play = child_play.xpath('string(.)')
|
|
|
- print(child_name, play)
|
|
|
- else:
|
|
|
- pass
|
|
|
+ play_datas.append({"play_name": child_name, "play_result": play})
|
|
|
+ play_datas.append(h_dict)
|
|
|
+ play_datas.append(a_dict)
|
|
|
+ item = Hgsaiguo()
|
|
|
+ item["league_id"] = league_id
|
|
|
+ item["league_name"] = league_name
|
|
|
+ item["match_id"] = match_id
|
|
|
+ item["match_date"] = match_date
|
|
|
+ item["match_time"] = match_time
|
|
|
+ item["home_team"] = h_name
|
|
|
+ item["guest_team"] = a_name
|
|
|
+ item["score_result"] = f_score
|
|
|
+ item["play_data"] = play_datas
|
|
|
+ item["pt"] = pt
|
|
|
+ item["page"] = page
|
|
|
+ yield item
|
|
|
|
|
|
# 棒球赛果
|
|
|
if pt == 4:
|
|
|
@@ -208,7 +265,6 @@ class HgjieshuSpider(scrapy.Spider):
|
|
|
league_id = league_ids[index]
|
|
|
league_name = league_names[index]
|
|
|
response_data = response.xpath('//div[@id="dt-{}"]'.format(league_id)).extract_first()
|
|
|
- # response_data = response.xpath('//div[@id="dt-{}"]'.format('cmp-26753')).extract_first()
|
|
|
data = etree.HTML(response_data)
|
|
|
# 球队名
|
|
|
team_names = data.xpath('//div[@class="rt-event"]/@title')
|
|
|
@@ -236,19 +292,32 @@ class HgjieshuSpider(scrapy.Spider):
|
|
|
h_name = team_name[0]
|
|
|
# 客队
|
|
|
a_name = team_name[1]
|
|
|
- print(h_name, a_name)
|
|
|
# 上半场
|
|
|
h_score = h_scores[y].xpath('string(.)').replace(' ', '').replace('\r\n', '')
|
|
|
# 全场
|
|
|
f_score = f_scores[y].xpath('string(.)').replace(' ', '').replace('\r\n', '')
|
|
|
# 正则时间匹配规则
|
|
|
pattern = re.compile(r"\d{1,3}:\d{1,3}")
|
|
|
- stime = pattern.findall(stimes[y])[0]
|
|
|
+ match_time = pattern.findall(stimes[y])[0]
|
|
|
+ play_datas = []
|
|
|
if odd_names:
|
|
|
for i in range(len(odd_names)):
|
|
|
# 子玩法名
|
|
|
name = odd_names[i].text.replace(' ', '').replace('\r\n', '')
|
|
|
# 子玩法赛果
|
|
|
play = odd_plays[i].xpath('string(.)').replace(' ', '').replace('\r\n', '')
|
|
|
- else:
|
|
|
- pass
|
|
|
+ play_datas.append({"play_name": name, "play_result": play})
|
|
|
+ item = Hgsaiguo()
|
|
|
+ item["league_id"] = league_id
|
|
|
+ item["league_name"] = league_name
|
|
|
+ item["match_id"] = match_id
|
|
|
+ item["match_date"] = match_date
|
|
|
+ item["match_time"] = match_time
|
|
|
+ item["home_team"] = h_name
|
|
|
+ item["guest_team"] = a_name
|
|
|
+ item["score_half"] = h_score
|
|
|
+ item["score_full"] = f_score
|
|
|
+ item["play_data"] = play_datas
|
|
|
+ item["pt"] = pt
|
|
|
+ item["page"] = page
|
|
|
+ yield item
|