|
@@ -12,7 +12,7 @@ class ZuqiuSpider(scrapy.Spider):
|
|
|
allowed_domains = ['m.hgg070.com']
|
|
allowed_domains = ['m.hgg070.com']
|
|
|
custom_settings = {
|
|
custom_settings = {
|
|
|
"ITEM_PIPELINES": {
|
|
"ITEM_PIPELINES": {
|
|
|
- # "hgg070_spider.pipelines.zuqiu.ZuqiuPipeline": 200,
|
|
|
|
|
|
|
+ "hgg070_spider.pipelines.zuqiu.ZuqiuPipeline": 200,
|
|
|
},
|
|
},
|
|
|
# 'LOG_LEVEL': 'DEBUG',
|
|
# 'LOG_LEVEL': 'DEBUG',
|
|
|
# 'LOG_FILE': cpath + "/log/sports_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
|
|
# 'LOG_FILE': cpath + "/log/sports_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
|
|
@@ -35,12 +35,11 @@ class ZuqiuSpider(scrapy.Spider):
|
|
|
def start_requests(self):
|
|
def start_requests(self):
|
|
|
url = "http://m.hgg070.com/app/member/get_league_list.php"
|
|
url = "http://m.hgg070.com/app/member/get_league_list.php"
|
|
|
h_types = [('FT', '', '130'), ('FU', 'P', '131'), ('FU', "", '130'), ('FU', 'P', '131')]
|
|
h_types = [('FT', '', '130'), ('FU', 'P', '131'), ('FU', "", '130'), ('FU', 'P', '131')]
|
|
|
- # for h_type in h_types:
|
|
|
|
|
for i, h_type in enumerate(h_types):
|
|
for i, h_type in enumerate(h_types):
|
|
|
show_type, isp, length = h_type
|
|
show_type, isp, length = h_type
|
|
|
self.headers['Content-Length'] = length
|
|
self.headers['Content-Length'] = length
|
|
|
from_data = {
|
|
from_data = {
|
|
|
- 'uid': 'a76428ffc4d1ca306ea354d3ff5013bb8095c8d4101e7ce76db97f63ff061729',
|
|
|
|
|
|
|
+ 'uid': '4d6e7f8af34715653b6039ca9b43737f096ed82446e3d37e033349aba0e3e753',
|
|
|
'langx': 'zh-cn',
|
|
'langx': 'zh-cn',
|
|
|
'ltype': '3',
|
|
'ltype': '3',
|
|
|
'gtype': 'FT',
|
|
'gtype': 'FT',
|
|
@@ -50,28 +49,32 @@ class ZuqiuSpider(scrapy.Spider):
|
|
|
'isP': isp
|
|
'isP': isp
|
|
|
}
|
|
}
|
|
|
yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers,
|
|
yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers,
|
|
|
- meta={'showtype': show_type, 'isp': isp, 'index': i}, dont_filter=True)
|
|
|
|
|
|
|
+ meta={'index': i}, dont_filter=True)
|
|
|
|
|
|
|
|
def parse(self, response):
|
|
def parse(self, response):
|
|
|
leagues = response.xpath('//serverresponse/game/league')
|
|
leagues = response.xpath('//serverresponse/game/league')
|
|
|
url = 'http://m.hgg070.com/app/member/get_game_list.php'
|
|
url = 'http://m.hgg070.com/app/member/get_game_list.php'
|
|
|
if leagues:
|
|
if leagues:
|
|
|
- showtype = response.meta['showtype']
|
|
|
|
|
- isp = response.meta['isp']
|
|
|
|
|
index = response.meta['index']
|
|
index = response.meta['index']
|
|
|
if index == 0:
|
|
if index == 0:
|
|
|
date = ''
|
|
date = ''
|
|
|
|
|
+ showtype = 'FT'
|
|
|
|
|
+ isp = ''
|
|
|
self.headers['Content-Length'] = '147'
|
|
self.headers['Content-Length'] = '147'
|
|
|
elif index == 2:
|
|
elif index == 2:
|
|
|
date = 'all'
|
|
date = 'all'
|
|
|
|
|
+ showtype = 'FU'
|
|
|
|
|
+ isp = ''
|
|
|
self.headers['Content-Length'] = '150'
|
|
self.headers['Content-Length'] = '150'
|
|
|
else:
|
|
else:
|
|
|
date = 'all'
|
|
date = 'all'
|
|
|
|
|
+ showtype = 'FU'
|
|
|
|
|
+ isp = 'P'
|
|
|
self.headers['Content-Length'] = '151'
|
|
self.headers['Content-Length'] = '151'
|
|
|
for league in leagues:
|
|
for league in leagues:
|
|
|
lid = league.xpath('.//league_id/text()').extract_first()
|
|
lid = league.xpath('.//league_id/text()').extract_first()
|
|
|
from_data = {
|
|
from_data = {
|
|
|
- 'uid': 'a76428ffc4d1ca306ea354d3ff5013bb8095c8d4101e7ce76db97f63ff061729',
|
|
|
|
|
|
|
+ 'uid': '4d6e7f8af34715653b6039ca9b43737f096ed82446e3d37e033349aba0e3e753',
|
|
|
'langx': 'zh-cn',
|
|
'langx': 'zh-cn',
|
|
|
'ltype': '3',
|
|
'ltype': '3',
|
|
|
'gtype': 'FT',
|
|
'gtype': 'FT',
|
|
@@ -82,20 +85,13 @@ class ZuqiuSpider(scrapy.Spider):
|
|
|
'isP': isp
|
|
'isP': isp
|
|
|
}
|
|
}
|
|
|
yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers,
|
|
yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers,
|
|
|
- meta={'showtype': showtype, 'isp': isp, 'index': index}, dont_filter=True)
|
|
|
|
|
|
|
+ meta={'index': index}, dont_filter=True)
|
|
|
else:
|
|
else:
|
|
|
print('未获取到联赛id')
|
|
print('未获取到联赛id')
|
|
|
return
|
|
return
|
|
|
|
|
|
|
|
def parse_match(self, response):
|
|
def parse_match(self, response):
|
|
|
- # showtype = response.meta['showtype']
|
|
|
|
|
- # isp = response.meta['isp']
|
|
|
|
|
index = response.meta['index']
|
|
index = response.meta['index']
|
|
|
- if response.status == 400:
|
|
|
|
|
- print(response.status)
|
|
|
|
|
- # print(showtype, isp)
|
|
|
|
|
- print('parse_odds', response.url)
|
|
|
|
|
- pass
|
|
|
|
|
url = 'http://m.hgg070.com/app/member/get_game_more.php'
|
|
url = 'http://m.hgg070.com/app/member/get_game_more.php'
|
|
|
if index == 0:
|
|
if index == 0:
|
|
|
date = ''
|
|
date = ''
|
|
@@ -117,25 +113,12 @@ class ZuqiuSpider(scrapy.Spider):
|
|
|
showtype = 'FU'
|
|
showtype = 'FU'
|
|
|
isp = 'P'
|
|
isp = 'P'
|
|
|
self.headers['Content-Length'] = '136'
|
|
self.headers['Content-Length'] = '136'
|
|
|
- # showtype = response.meta['showtype']
|
|
|
|
|
- # isp = response.meta['isp']
|
|
|
|
|
- # if showtype == 'FT' and isp == '':
|
|
|
|
|
- # date = ''
|
|
|
|
|
- # self.headers['Content-Length'] = '132'
|
|
|
|
|
- # elif showtype == 'FU' and isp == 'P':
|
|
|
|
|
- # date = 'all'
|
|
|
|
|
- # self.headers['Content-Length'] = '136'
|
|
|
|
|
- # elif showtype == 'FU' and isp == '':
|
|
|
|
|
- # date = ''
|
|
|
|
|
- # self.headers['Content-Length'] = '132'
|
|
|
|
|
- # else:
|
|
|
|
|
- # date = 'all'
|
|
|
|
|
- # self.headers['Content-Length'] = '136'
|
|
|
|
|
gids = response.xpath('//serverresponse/game/gid/text()').extract()
|
|
gids = response.xpath('//serverresponse/game/gid/text()').extract()
|
|
|
|
|
+ tags = response.xpath('//serverresponse/game/more_count/text()').extract()
|
|
|
if gids:
|
|
if gids:
|
|
|
- for gid in gids:
|
|
|
|
|
|
|
+ for i, gid in enumerate(gids):
|
|
|
from_data = {
|
|
from_data = {
|
|
|
- 'uid': 'a76428ffc4d1ca306ea354d3ff5013bb8095c8d4101e7ce76db97f63ff061729',
|
|
|
|
|
|
|
+ 'uid': '4d6e7f8af34715653b6039ca9b43737f096ed82446e3d37e033349aba0e3e753',
|
|
|
'langx': 'zh-cn',
|
|
'langx': 'zh-cn',
|
|
|
'ltype': '3',
|
|
'ltype': '3',
|
|
|
'gtype': 'FT',
|
|
'gtype': 'FT',
|
|
@@ -144,14 +127,17 @@ class ZuqiuSpider(scrapy.Spider):
|
|
|
'isP': isp,
|
|
'isP': isp,
|
|
|
'gid': gid,
|
|
'gid': gid,
|
|
|
}
|
|
}
|
|
|
|
|
+ tag = tags[i]
|
|
|
yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_odds, headers=self.headers,
|
|
yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_odds, headers=self.headers,
|
|
|
- meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
|
|
|
|
|
|
|
+ meta={'index': index, 'tag': tag}, dont_filter=True)
|
|
|
|
|
|
|
|
def parse_odds(self, response):
|
|
def parse_odds(self, response):
|
|
|
- if response.status == 400:
|
|
|
|
|
- print(response.status)
|
|
|
|
|
- print('parse_odds', response.url)
|
|
|
|
|
- game = response.xpath('//serverresponse/game')[0]
|
|
|
|
|
|
|
+ index = response.meta['index']
|
|
|
|
|
+ tag = response.meta['tag']
|
|
|
|
|
+ try:
|
|
|
|
|
+ game = response.xpath('//serverresponse/game')[0]
|
|
|
|
|
+ except:
|
|
|
|
|
+ return
|
|
|
logger = logging.getLogger(__name__)
|
|
logger = logging.getLogger(__name__)
|
|
|
if game:
|
|
if game:
|
|
|
game_odds = {}
|
|
game_odds = {}
|
|
@@ -164,8 +150,9 @@ class ZuqiuSpider(scrapy.Spider):
|
|
|
else:
|
|
else:
|
|
|
game_odds[i.tag] = i.text
|
|
game_odds[i.tag] = i.text
|
|
|
else:
|
|
else:
|
|
|
- pass
|
|
|
|
|
- # logger.info('gopen == "N", 详细赔率盘口未开启')
|
|
|
|
|
|
|
+ logger.info('gopen == "N", 详细赔率盘口未开启')
|
|
|
item = ZuqiuItem()
|
|
item = ZuqiuItem()
|
|
|
- item['all'] = game_odds
|
|
|
|
|
|
|
+ item['data'] = game_odds
|
|
|
|
|
+ item['index'] = index
|
|
|
|
|
+ item['tag'] = tag
|
|
|
yield item
|
|
yield item
|