|
@@ -1,10 +1,23 @@
|
|
|
# -*- coding: utf-8 -*-
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
+import logging
|
|
|
|
|
+import lxml
|
|
|
|
|
+
|
|
|
import scrapy
|
|
import scrapy
|
|
|
|
|
|
|
|
|
|
+from items import ZuqiuItem
|
|
|
|
|
+
|
|
|
|
|
|
|
|
class ZuqiuSpider(scrapy.Spider):
|
|
class ZuqiuSpider(scrapy.Spider):
|
|
|
name = 'zuqiu'
|
|
name = 'zuqiu'
|
|
|
allowed_domains = ['m.hgg070.com']
|
|
allowed_domains = ['m.hgg070.com']
|
|
|
|
|
+ custom_settings = {
|
|
|
|
|
+ "ITEM_PIPELINES": {
|
|
|
|
|
+ "hgg070_spider.pipelines.zuqiu.ZuqiuPipeline": 200,
|
|
|
|
|
+ },
|
|
|
|
|
+ # 'LOG_LEVEL': 'DEBUG',
|
|
|
|
|
+ # 'LOG_FILE': cpath + "/log/sports_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
headers = {
|
|
headers = {
|
|
|
'Accept': '*/*',
|
|
'Accept': '*/*',
|
|
|
'Accept-Encoding': 'gzip, deflate',
|
|
'Accept-Encoding': 'gzip, deflate',
|
|
@@ -19,15 +32,15 @@ class ZuqiuSpider(scrapy.Spider):
|
|
|
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
|
|
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- # 读取今日足球
|
|
|
|
|
def start_requests(self):
|
|
def start_requests(self):
|
|
|
url = "http://m.hgg070.com/app/member/get_league_list.php"
|
|
url = "http://m.hgg070.com/app/member/get_league_list.php"
|
|
|
- h_types = [('FT', '', '130'), ('FU', 'P', '131'), ('FU', "", '130'), ('FU', '', '131')]
|
|
|
|
|
|
|
+ h_types = [('FT', '', '130'), ('FU', 'P', '131'), ('FU', "", '130'), ('FU', 'P', '131')]
|
|
|
for h_type in h_types:
|
|
for h_type in h_types:
|
|
|
- show_type, isp, length = h_type
|
|
|
|
|
|
|
+ # show_type, isp, length = h_type
|
|
|
|
|
+ show_type, isp, length = h_types[3]
|
|
|
self.headers['Content-Length'] = length
|
|
self.headers['Content-Length'] = length
|
|
|
from_data = {
|
|
from_data = {
|
|
|
- 'uid': '4f6573b99cc31f7be1579b063888a8a2bcda122ce94228510cda9e1bb32a477f',
|
|
|
|
|
|
|
+ 'uid': '013dc3a00cbd488238236010f78ab4a41af7e6ff05ceb96bc0854b60807a42eb',
|
|
|
'langx': 'zh-cn',
|
|
'langx': 'zh-cn',
|
|
|
'ltype': '3',
|
|
'ltype': '3',
|
|
|
'gtype': 'FT',
|
|
'gtype': 'FT',
|
|
@@ -40,6 +53,9 @@ class ZuqiuSpider(scrapy.Spider):
|
|
|
meta={'showtype': show_type, 'isp': isp}, dont_filter=True)
|
|
meta={'showtype': show_type, 'isp': isp}, dont_filter=True)
|
|
|
|
|
|
|
|
def parse(self, response):
|
|
def parse(self, response):
|
|
|
|
|
+ if response.status == 400:
|
|
|
|
|
+ print(response.status)
|
|
|
|
|
+ print('parse', response.url)
|
|
|
leagues = response.xpath('//serverresponse/game/league')
|
|
leagues = response.xpath('//serverresponse/game/league')
|
|
|
url = 'http://m.hgg070.com/app/member/get_game_list.php'
|
|
url = 'http://m.hgg070.com/app/member/get_game_list.php'
|
|
|
if leagues:
|
|
if leagues:
|
|
@@ -60,7 +76,7 @@ class ZuqiuSpider(scrapy.Spider):
|
|
|
for league in leagues:
|
|
for league in leagues:
|
|
|
lid = league.xpath('.//league_id/text()').extract_first()
|
|
lid = league.xpath('.//league_id/text()').extract_first()
|
|
|
from_data = {
|
|
from_data = {
|
|
|
- 'uid': '4f6573b99cc31f7be1579b063888a8a2bcda122ce94228510cda9e1bb32a477f',
|
|
|
|
|
|
|
+ 'uid': '013dc3a00cbd488238236010f78ab4a41af7e6ff05ceb96bc0854b60807a42eb',
|
|
|
'langx': 'zh-cn',
|
|
'langx': 'zh-cn',
|
|
|
'ltype': '3',
|
|
'ltype': '3',
|
|
|
'gtype': 'FT',
|
|
'gtype': 'FT',
|
|
@@ -72,8 +88,14 @@ class ZuqiuSpider(scrapy.Spider):
|
|
|
}
|
|
}
|
|
|
yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers,
|
|
yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers,
|
|
|
meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
|
|
meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
|
|
|
|
|
+ else:
|
|
|
|
|
+ print('未获取到联赛id')
|
|
|
|
|
+ return
|
|
|
|
|
|
|
|
def parse_match(self, response):
|
|
def parse_match(self, response):
|
|
|
|
|
+ if response.status == 400:
|
|
|
|
|
+ print(response.status)
|
|
|
|
|
+ print('parse_match', response.url)
|
|
|
url = 'http://m.hgg070.com/app/member/get_game_more.php'
|
|
url = 'http://m.hgg070.com/app/member/get_game_more.php'
|
|
|
showtype = response.meta['showtype']
|
|
showtype = response.meta['showtype']
|
|
|
isp = response.meta['isp']
|
|
isp = response.meta['isp']
|
|
@@ -93,7 +115,7 @@ class ZuqiuSpider(scrapy.Spider):
|
|
|
if gids:
|
|
if gids:
|
|
|
for gid in gids:
|
|
for gid in gids:
|
|
|
from_data = {
|
|
from_data = {
|
|
|
- 'uid': '4f6573b99cc31f7be1579b063888a8a2bcda122ce94228510cda9e1bb32a477f',
|
|
|
|
|
|
|
+ 'uid': '013dc3a00cbd488238236010f78ab4a41af7e6ff05ceb96bc0854b60807a42eb',
|
|
|
'langx': 'zh-cn',
|
|
'langx': 'zh-cn',
|
|
|
'ltype': '3',
|
|
'ltype': '3',
|
|
|
'gtype': 'FT',
|
|
'gtype': 'FT',
|
|
@@ -106,5 +128,25 @@ class ZuqiuSpider(scrapy.Spider):
|
|
|
meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
|
|
meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
|
|
|
|
|
|
|
|
def parse_odds(self, response):
|
|
def parse_odds(self, response):
|
|
|
- print(response.text)
|
|
|
|
|
- pass
|
|
|
|
|
|
|
+ # print(response.text)
|
|
|
|
|
+ # game_lists = []
|
|
|
|
|
+ if response.status == 400:
|
|
|
|
|
+ print(response.status)
|
|
|
|
|
+ print('parse_odds', response.url)
|
|
|
|
|
+ game = response.xpath('//serverresponse/game')[0]
|
|
|
|
|
+ logger = logging.getLogger(__name__)
|
|
|
|
|
+ if game:
|
|
|
|
|
+ game_odds = {}
|
|
|
|
|
+ gopen = game.xpath('//game/gopen/text()').extract_first()
|
|
|
|
|
+ if gopen == 'Y':
|
|
|
|
|
+ game = lxml.etree.fromstring(game.extract())
|
|
|
|
|
+ for i in game.getchildren():
|
|
|
|
|
+ if i.text == None:
|
|
|
|
|
+ game_odds[i.tag] = ""
|
|
|
|
|
+ else:
|
|
|
|
|
+ game_odds[i.tag] = i.text
|
|
|
|
|
+ else:
|
|
|
|
|
+ logger.info('gopen == "N", 详细赔率盘口未开启')
|
|
|
|
|
+ item = ZuqiuItem()
|
|
|
|
|
+ item['all'] = game_odds
|
|
|
|
|
+ yield item
|