|
@@ -3,25 +3,27 @@ import scrapy
|
|
|
from ..items import LanqiuItem
|
|
from ..items import LanqiuItem
|
|
|
import copy
|
|
import copy
|
|
|
import lxml.etree
|
|
import lxml.etree
|
|
|
-import re,os,json
|
|
|
|
|
|
|
+import re, os, json
|
|
|
from ..utils.helper import Helper
|
|
from ..utils.helper import Helper
|
|
|
import time
|
|
import time
|
|
|
from ..items import LanqiuItem
|
|
from ..items import LanqiuItem
|
|
|
import xmltodict
|
|
import xmltodict
|
|
|
|
|
|
|
|
|
|
+
|
|
|
class LqSportsSpider(scrapy.Spider):
|
|
class LqSportsSpider(scrapy.Spider):
|
|
|
name = 'lq_sports'
|
|
name = 'lq_sports'
|
|
|
allowed_domains = ['m.hgg070.com/']
|
|
allowed_domains = ['m.hgg070.com/']
|
|
|
start_urls = ['http://m.hgg070.com//']
|
|
start_urls = ['http://m.hgg070.com//']
|
|
|
remath = re.compile("篮球")
|
|
remath = re.compile("篮球")
|
|
|
|
|
+
|
|
|
# custom_settings={
|
|
# custom_settings={
|
|
|
# "ITEM_PIPELINES": {
|
|
# "ITEM_PIPELINES": {
|
|
|
# "hgg070_spider.pipelines.lq_sports.LqSportsPipeline": 200,
|
|
# "hgg070_spider.pipelines.lq_sports.LqSportsPipeline": 200,
|
|
|
# },
|
|
# },
|
|
|
# }
|
|
# }
|
|
|
def start_requests(self):
|
|
def start_requests(self):
|
|
|
- #今日,早盘
|
|
|
|
|
- h_types=[('FT'),('FU')]
|
|
|
|
|
|
|
+ # 今日,早盘
|
|
|
|
|
+ h_types = [('FT'), ('FU')]
|
|
|
headers = {
|
|
headers = {
|
|
|
'Accept': '*/*',
|
|
'Accept': '*/*',
|
|
|
'Accept-Encoding': 'gzip, deflate',
|
|
'Accept-Encoding': 'gzip, deflate',
|
|
@@ -38,7 +40,7 @@ class LqSportsSpider(scrapy.Spider):
|
|
|
url = "http://m.hgg070.com/app/member/get_league_list.php"
|
|
url = "http://m.hgg070.com/app/member/get_league_list.php"
|
|
|
for item in h_types:
|
|
for item in h_types:
|
|
|
showtype = item
|
|
showtype = item
|
|
|
- data={
|
|
|
|
|
|
|
+ data = {
|
|
|
'uid': '3970335d20df9b8ceca8673ae9b6ea910c912492f595c0ef163623ae0ea883b6',
|
|
'uid': '3970335d20df9b8ceca8673ae9b6ea910c912492f595c0ef163623ae0ea883b6',
|
|
|
'langx': 'zh-cn',
|
|
'langx': 'zh-cn',
|
|
|
'ltype': '3',
|
|
'ltype': '3',
|
|
@@ -48,50 +50,51 @@ class LqSportsSpider(scrapy.Spider):
|
|
|
'date': '',
|
|
'date': '',
|
|
|
'isP': ''
|
|
'isP': ''
|
|
|
}
|
|
}
|
|
|
- yield scrapy.FormRequest(url=url,formdata=data,callback=self.parse,headers=headers,
|
|
|
|
|
- meta={"data":data}, dont_filter=True)
|
|
|
|
|
|
|
+ yield scrapy.FormRequest(url=url, formdata=data, callback=self.parse, headers=headers,
|
|
|
|
|
+ meta={"data": data}, dont_filter=True)
|
|
|
|
|
|
|
|
def parse(self, response):
|
|
def parse(self, response):
|
|
|
- #获取id并判断抓取的球型
|
|
|
|
|
- data=response.meta["data"]
|
|
|
|
|
- fromdata=copy.deepcopy(data)
|
|
|
|
|
- league=response.xpath('//league')
|
|
|
|
|
- url="http://m.hgg070.com/app/member/get_game_list.php"
|
|
|
|
|
|
|
+ # 获取id并判断抓取的球型
|
|
|
|
|
+ data = response.meta["data"]
|
|
|
|
|
+ fromdata = copy.deepcopy(data)
|
|
|
|
|
+ league = response.xpath('//league')
|
|
|
|
|
+ url = "http://m.hgg070.com/app/member/get_game_list.php"
|
|
|
for le in league:
|
|
for le in league:
|
|
|
- name=le.xpath('./league_name/text()').extract_first()
|
|
|
|
|
- if len(self.remath.findall(name))>0:
|
|
|
|
|
|
|
+ name = le.xpath('./league_name/text()').extract_first()
|
|
|
|
|
+ if len(self.remath.findall(name)) > 0:
|
|
|
lid = le.xpath('./league_id/text()').extract_first()
|
|
lid = le.xpath('./league_id/text()').extract_first()
|
|
|
# 抓取今日
|
|
# 抓取今日
|
|
|
- if data["showtype"]=="FT":
|
|
|
|
|
- data['lid'],data['sorttype'],data['date']=lid,'league',''
|
|
|
|
|
|
|
+ if data["showtype"] == "FT":
|
|
|
|
|
+ data['lid'], data['sorttype'], data['date'] = lid, 'league', ''
|
|
|
# 抓取早盘
|
|
# 抓取早盘
|
|
|
- elif data["showtype"]=="FU":
|
|
|
|
|
|
|
+ elif data["showtype"] == "FU":
|
|
|
data['lid'], data['sorttype'], data['date'] = lid, 'league', 'all'
|
|
data['lid'], data['sorttype'], data['date'] = lid, 'league', 'all'
|
|
|
- yield scrapy.FormRequest(url=url,formdata=data,callback=self.detailball,meta={"data":fromdata},dont_filter=True)
|
|
|
|
|
|
|
+ yield scrapy.FormRequest(url=url, formdata=data, callback=self.detailball, meta={"data": fromdata},
|
|
|
|
|
+ dont_filter=True)
|
|
|
|
|
|
|
|
- def detailball(self,response):
|
|
|
|
|
- data=response.meta["data"]
|
|
|
|
|
- url="http://m.hgg070.com/app/member/get_game_more.php"
|
|
|
|
|
- #获取联赛id gid
|
|
|
|
|
- game=response.xpath("//game")
|
|
|
|
|
|
|
+ def detailball(self, response):
|
|
|
|
|
+ data = response.meta["data"]
|
|
|
|
|
+ url = "http://m.hgg070.com/app/member/get_game_more.php"
|
|
|
|
|
+ # 获取联赛id gid
|
|
|
|
|
+ game = response.xpath("//game")
|
|
|
for g in game:
|
|
for g in game:
|
|
|
- gid=g.xpath("./gid/text()").extract_first()
|
|
|
|
|
|
|
+ gid = g.xpath("./gid/text()").extract_first()
|
|
|
more_count = g.xpath("./more_count/text()").extract_first()
|
|
more_count = g.xpath("./more_count/text()").extract_first()
|
|
|
- data["gid"]=gid
|
|
|
|
|
- yield scrapy.FormRequest(url=url,formdata=data,callback=self.getItem,meta={"more_count":more_count,"isP":data["isP"]},dont_filter=True)
|
|
|
|
|
-
|
|
|
|
|
|
|
+ data["gid"] = gid
|
|
|
|
|
+ yield scrapy.FormRequest(url=url, formdata=data, callback=self.getItem,
|
|
|
|
|
+ meta={"more_count": more_count, "isP": data["isP"]}, dont_filter=True)
|
|
|
|
|
|
|
|
- def getItem(self,response):
|
|
|
|
|
|
|
+ def getItem(self, response):
|
|
|
more_count = response.meta["more_count"]
|
|
more_count = response.meta["more_count"]
|
|
|
isP = response.meta["isP"]
|
|
isP = response.meta["isP"]
|
|
|
- data= xmltodict.parse(response.text)['serverresponse']['game']
|
|
|
|
|
- game_lists=[i for i in data if i['gopen']=='Y']
|
|
|
|
|
|
|
+ data = xmltodict.parse(response.text)['serverresponse']['game']
|
|
|
|
|
+ game_lists = [i for i in data if i['gopen'] == 'Y']
|
|
|
|
|
|
|
|
if game_lists:
|
|
if game_lists:
|
|
|
for gl in game_lists:
|
|
for gl in game_lists:
|
|
|
- cpath=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
|
- with open(cpath+"/conf/hgg070.json",encoding='utf8') as hg:
|
|
|
|
|
- hgg=json.load(hg)['bk']
|
|
|
|
|
|
|
+ cpath = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
|
+ with open(cpath + "/conf/hgg070.json", encoding='utf8') as hg:
|
|
|
|
|
+ hgg = json.load(hg)['bk']
|
|
|
datetime = gl['datetime'][:-8] + " " + gl['datetime'][-8:]
|
|
datetime = gl['datetime'][:-8] + " " + gl['datetime'][-8:]
|
|
|
team_h = gl['team_h']
|
|
team_h = gl['team_h']
|
|
|
team_c = gl['team_c']
|
|
team_c = gl['team_c']
|
|
@@ -100,16 +103,17 @@ class LqSportsSpider(scrapy.Spider):
|
|
|
match_uid = Helper.genearte_uuid(team_h + team_c + datetime)
|
|
match_uid = Helper.genearte_uuid(team_h + team_c + datetime)
|
|
|
data = []
|
|
data = []
|
|
|
for hg in hgg:
|
|
for hg in hgg:
|
|
|
- items=hg['items']
|
|
|
|
|
- if gl[hg['prodds']]=='Y':
|
|
|
|
|
|
|
+ items = hg['items']
|
|
|
|
|
+ if gl[hg['prodds']] == 'Y':
|
|
|
for x in items:
|
|
for x in items:
|
|
|
odds_code = gl[x['rodds']]
|
|
odds_code = gl[x['rodds']]
|
|
|
p_code = gl[hg['prodds']]
|
|
p_code = gl[hg['prodds']]
|
|
|
- odds=gl["ior_OUH"]
|
|
|
|
|
- #有两个条件,加两条数据
|
|
|
|
|
- if x['ratio_name']: #大的
|
|
|
|
|
- condition_u=gl[x['ratio_name']]
|
|
|
|
|
- odds_only = hg["plodds"] + x["lodds"] + '0' + condition_u + str(odds) + "hg3535" + str(match_id)
|
|
|
|
|
|
|
+ odds = gl["ior_OUH"]
|
|
|
|
|
+ # 有两个条件,加两条数据
|
|
|
|
|
+ if x['ratio_name']: # 大的
|
|
|
|
|
+ condition_u = gl[x['ratio_name']]
|
|
|
|
|
+ odds_only = hg["plodds"] + x["lodds"] + '0' + condition_u + str(odds) + "hg3535" + str(
|
|
|
|
|
+ match_id)
|
|
|
sole = hg["plodds"] + x["lodds"] + '0' + str(match_id) + "hg3535"
|
|
sole = hg["plodds"] + x["lodds"] + '0' + str(match_id) + "hg3535"
|
|
|
tobj = {"match_id": match_id, "lg_id": league_id, "odds_code": odds_code, "status": 0,
|
|
tobj = {"match_id": match_id, "lg_id": league_id, "odds_code": odds_code, "status": 0,
|
|
|
"sort": 0, "p_code": p_code,
|
|
"sort": 0, "p_code": p_code,
|
|
@@ -117,23 +121,25 @@ class LqSportsSpider(scrapy.Spider):
|
|
|
"source": "hgg070", "type": 0, "team": ""}
|
|
"source": "hgg070", "type": 0, "team": ""}
|
|
|
data.append(tobj)
|
|
data.append(tobj)
|
|
|
|
|
|
|
|
- if x['latio']: #小的
|
|
|
|
|
|
|
+ if x['latio']: # 小的
|
|
|
condition_s = gl[x['latio']]
|
|
condition_s = gl[x['latio']]
|
|
|
- odds_only =hg["plodds"] + x["lodds"] + '0' +condition_s + str(odds) + "hg3535" + str(match_id)
|
|
|
|
|
|
|
+ odds_only = hg["plodds"] + x["lodds"] + '0' + condition_s + str(odds) + "hg3535" + str(
|
|
|
|
|
+ match_id)
|
|
|
sole = hg["plodds"] + x["lodds"] + '0' + str(match_id) + "hg3535"
|
|
sole = hg["plodds"] + x["lodds"] + '0' + str(match_id) + "hg3535"
|
|
|
tobj = {"match_id": match_id, "lg_id": league_id, "odds_code": odds_code, "status": 0,
|
|
tobj = {"match_id": match_id, "lg_id": league_id, "odds_code": odds_code, "status": 0,
|
|
|
"sort": 0, "p_code": p_code,
|
|
"sort": 0, "p_code": p_code,
|
|
|
- "odds": odds,"condition": condition_s, "odds_only": odds_only, "sole": sole,
|
|
|
|
|
|
|
+ "odds": odds, "condition": condition_s, "odds_only": odds_only, "sole": sole,
|
|
|
"source": "hgg070", "type": 0, "team": ""}
|
|
"source": "hgg070", "type": 0, "team": ""}
|
|
|
data.append(tobj)
|
|
data.append(tobj)
|
|
|
|
|
|
|
|
if not x['latio'] and not x['ratio_name']:
|
|
if not x['latio'] and not x['ratio_name']:
|
|
|
condition_s = ''
|
|
condition_s = ''
|
|
|
- odds_only = hg["plodds"] + x["lodds"] + '0' +condition_s + str(odds) + "hg3535" + str(match_id)
|
|
|
|
|
|
|
+ odds_only = hg["plodds"] + x["lodds"] + '0' + condition_s + str(odds) + "hg3535" + str(
|
|
|
|
|
+ match_id)
|
|
|
sole = hg["plodds"] + x["lodds"] + '0' + str(match_id) + "hg3535"
|
|
sole = hg["plodds"] + x["lodds"] + '0' + str(match_id) + "hg3535"
|
|
|
tobj = {"match_id": match_id, "lg_id": league_id, "odds_code": odds_code, "status": 0,
|
|
tobj = {"match_id": match_id, "lg_id": league_id, "odds_code": odds_code, "status": 0,
|
|
|
"sort": 0, "p_code": p_code,
|
|
"sort": 0, "p_code": p_code,
|
|
|
- "odds": odds,"condition": condition_s, "odds_only": odds_only, "sole": sole,
|
|
|
|
|
|
|
+ "odds": odds, "condition": condition_s, "odds_only": odds_only, "sole": sole,
|
|
|
"source": "hgg070", "type": 0, "team": ""}
|
|
"source": "hgg070", "type": 0, "team": ""}
|
|
|
data.append(tobj)
|
|
data.append(tobj)
|
|
|
|
|
|
|
@@ -150,5 +156,5 @@ class LqSportsSpider(scrapy.Spider):
|
|
|
item['team_h'] = team_h
|
|
item['team_h'] = team_h
|
|
|
item['team_c'] = team_c
|
|
item['team_c'] = team_c
|
|
|
item['isP'] = isP
|
|
item['isP'] = isP
|
|
|
- print('wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww',item)
|
|
|
|
|
|
|
+ print('wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww', item)
|
|
|
yield item
|
|
yield item
|