# -*- coding: utf-8 -*- import json import logging # import lxml import scrapy import xmltodict # from ..items import ZuqiuItem class WangqiuSpider(scrapy.Spider): name = 'roll_wangqiu' allowed_domains = ['m.hg0088.com'] custom_settings = { "ITEM_PIPELINES": { "hgg070_spider.pipelines.roll_wangqiu.RollPipeline": 200, }, # 'LOG_LEVEL': 'DEBUG', # 'LOG_FILE': cpath + "/log/sports_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day) } headers = { 'Host': 'm.hg0088.com', 'Connection': 'keep-alive', # 'Content-Length': '89', # hg0088注释 'Origin': 'https://m.hg0088.com', 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1', 'Content-type': 'application/x-www-form-urlencoded', 'Accept': '*/*', 'Sec-Fetch-Site': 'same-origin', 'Sec-Fetch-Mode': 'cors', 'Referer': 'https://m.hg0088.com/', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cookie': '_ga=GA1.2.219750064.1572659333; box4pwd_notshow=Y; _gid=GA1.2.2031225008.1572829846; _gat=1' } def start_requests(self): pass url = "https://m.hg0088.com/app/member/get_league_list.php" from_data = { 'uid': 'yv8vy3csm22383986l393491', 'langx': 'zh-cn', 'ltype': '4', 'gtype': 'TN', 'showtype': 'RB', 'sorttype': '', 'date': '', 'isP': '' } yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers) def parse(self, response): print(111111111111) pass leagues = response.xpath('//serverresponse/game/league') url = 'http://m.hgg070.com/app/member/get_game_list.php' if leagues: for league in leagues: lid = league.xpath('.//league_id/text()').extract_first() from_data = { 'uid': '19fbb114b9503aaa806a1920203d73eb85db285f26188e36ae7172f550987364', 'langx': 'zh-cn', 'ltype': '4', 'gtype': 'TN', 'showtype': 'RB', 'lid': lid, 'sorttype': '', 'date': '', 'isP': '' } yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers, dont_filter=True) else: print('未获取到联赛id') return def parse_match(self, response): pass url = 'http://m.hgg070.com/app/member/get_game_more.php' gids = response.xpath('//serverresponse/game/gid/text()').extract() tags = response.xpath('//serverresponse/game/more_count/text()').extract() if gids: for i, gid in enumerate(gids): from_data = { 'uid': '19fbb114b9503aaa806a1920203d73eb85db285f26188e36ae7172f550987364', 'langx': 'zh-cn', 'ltype': '4', 'gtype': 'TN', 'showtype': 'RB', 'date': '', 'isP': '', 'gid': gid, } tag = tags[i] yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_odds, headers=self.headers, meta={'tag': tag}, dont_filter=True) def parse_odds(self, response): print('111111111111122333334555') pass # logger = logging.getLogger(__name__) # index = response.meta['index'] # tag = response.meta['tag'] # game = xmltodict.parse(response.text) # try: # game_odds = game['serverresponse']['game'][0] # except: # game_odds = game['serverresponse']['game'] # if game_odds['gopen'] == 'Y': # item = ZuqiuItem() # item['data'] = game_odds # item['index'] = index # item['tag'] = tag # yield item # else: # logger.info('gopen == "N", 详细赔率盘口未开启') # return