# -*- coding: utf-8 -*- import logging import lxml import scrapy from items import ZuqiuItem class ZuqiuSpider(scrapy.Spider): name = 'zuqiu' allowed_domains = ['m.hgg070.com'] custom_settings = { "ITEM_PIPELINES": { # "hgg070_spider.pipelines.zuqiu.ZuqiuPipeline": 200, }, # 'LOG_LEVEL': 'DEBUG', # 'LOG_FILE': cpath + "/log/sports_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day) } headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Content-Length': '130', 'Content-type': 'application/x-www-form-urlencoded', 'Cookie': '_ga=GA1.2.1009358217.1572056223; _gid=GA1.2.97506800.1572056223; _gat=1', 'Host': 'm.hgg070.com', 'Origin': 'http://m.hgg070.com', 'Proxy-Connection': 'keep-alive', 'Referer': 'http://m.hgg070.com/', 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1' } def start_requests(self): url = "http://m.hgg070.com/app/member/get_league_list.php" h_types = [('FT', '', '130'), ('FU', 'P', '131'), ('FU', "", '130'), ('FU', 'P', '131')] # for h_type in h_types: for i, h_type in enumerate(h_types): show_type, isp, length = h_type self.headers['Content-Length'] = length from_data = { 'uid': 'a76428ffc4d1ca306ea354d3ff5013bb8095c8d4101e7ce76db97f63ff061729', 'langx': 'zh-cn', 'ltype': '3', 'gtype': 'FT', 'showtype': show_type, 'sorttype': '', 'date': '', 'isP': isp } yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers, meta={'showtype': show_type, 'isp': isp, 'index': i}, dont_filter=True) def parse(self, response): leagues = response.xpath('//serverresponse/game/league') url = 'http://m.hgg070.com/app/member/get_game_list.php' if leagues: showtype = response.meta['showtype'] isp = response.meta['isp'] index = response.meta['index'] if index == 0: date = '' self.headers['Content-Length'] = '147' elif index == 2: date = 'all' self.headers['Content-Length'] = '150' else: date = 'all' self.headers['Content-Length'] = '151' for league in leagues: lid = league.xpath('.//league_id/text()').extract_first() from_data = { 'uid': 'a76428ffc4d1ca306ea354d3ff5013bb8095c8d4101e7ce76db97f63ff061729', 'langx': 'zh-cn', 'ltype': '3', 'gtype': 'FT', 'showtype': showtype, 'lid': lid, 'sorttype': 'league', 'date': date, 'isP': isp } yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers, meta={'showtype': showtype, 'isp': isp, 'index': index}, dont_filter=True) else: print('未获取到联赛id') return def parse_match(self, response): # showtype = response.meta['showtype'] # isp = response.meta['isp'] index = response.meta['index'] if response.status == 400: print(response.status) # print(showtype, isp) print('parse_odds', response.url) pass url = 'http://m.hgg070.com/app/member/get_game_more.php' if index == 0: date = '' showtype = 'FT' isp = '' self.headers['Content-Length'] = '132' elif index == 1: date = 'all' showtype = 'FT' isp = 'P' self.headers['Content-Length'] = '136' elif index == 2: date = '' showtype = 'FU' isp = '' self.headers['Content-Length'] = '132' else: date = 'all' showtype = 'FU' isp = 'P' self.headers['Content-Length'] = '136' # showtype = response.meta['showtype'] # isp = response.meta['isp'] # if showtype == 'FT' and isp == '': # date = '' # self.headers['Content-Length'] = '132' # elif showtype == 'FU' and isp == 'P': # date = 'all' # self.headers['Content-Length'] = '136' # elif showtype == 'FU' and isp == '': # date = '' # self.headers['Content-Length'] = '132' # else: # date = 'all' # self.headers['Content-Length'] = '136' gids = response.xpath('//serverresponse/game/gid/text()').extract() if gids: for gid in gids: from_data = { 'uid': 'a76428ffc4d1ca306ea354d3ff5013bb8095c8d4101e7ce76db97f63ff061729', 'langx': 'zh-cn', 'ltype': '3', 'gtype': 'FT', 'showtype': showtype, 'date': date, 'isP': isp, 'gid': gid, } yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_odds, headers=self.headers, meta={'showtype': showtype, 'isp': isp}, dont_filter=True) def parse_odds(self, response): if response.status == 400: print(response.status) print('parse_odds', response.url) game = response.xpath('//serverresponse/game')[0] logger = logging.getLogger(__name__) if game: game_odds = {} gopen = game.xpath('//game/gopen/text()').extract_first() if gopen == 'Y': game = lxml.etree.fromstring(game.extract()) for i in game.getchildren(): if i.text == None: game_odds[i.tag] = "" else: game_odds[i.tag] = i.text else: pass # logger.info('gopen == "N", 详细赔率盘口未开启') item = ZuqiuItem() item['all'] = game_odds yield item