# -*- coding: utf-8 -*- import scrapy <<<<<<< HEAD from ..items import ZuqiuItem class ZuqiuSpider(scrapy.Spider): name = 'zuqiu' allowed_domains = ['m.hgg070.com/'] headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Content-Length': '130', 'Content-type': 'application/x-www-form-urlencoded', 'Cookie': '_ga=GA1.2.471918301.1572059707; _gid=GA1.2.2109447865.1572059707; _gat=1', 'Host': 'm.hgg070.com', 'Origin': 'http://m.hgg070.com', 'Proxy-Connection': 'keep-alive', 'Referer': 'http://m.hgg070.com/', 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Mobile Safari/537.36' } custom_settings={ "ITEM_PIPELINES": { "collectSports.pipelines.zuqiu.ZuqiuPipeline": 200, }, } #读取今日足球 def start_requests(self): url="http://m.hgg070.com/app/member/get_league_list.php" from_data={ 'uid': '7c70e73f576d42d9f6d9fb1fcaa08c47b04bb9279584caedfe65858afb26722d', 'langx': 'zh-cn', 'ltype': '3', 'gtype': 'FT', 'showtype': 'RB', 'sorttype': '', 'date': '', 'isP': '' } yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers,meta={'data': from_data}, dont_filter=True) #解析今日足球,获取所有的联赛,并请求联赛详情页 def parse(self, response): url='http://m.hgg070.com/app/member/get_game_list.php' data=response.xpath("//league") from_data=response.meta['data'] for item in data: lid = item.xpath('./league_id/text()').extract_first() from_data['lid'] = lid yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.detailtedMsg,meta={'data': response.meta['data']},dont_filter=True) # 获取所有玩法 def detailtedMsg(self,response): url = 'http://m.hgg070.com/app/member/get_game_more.php' data=response.xpath("//game") from_data=response.meta['data'] for item in data: lid = item.xpath('./gid/text()').extract_first() from_data['gid'] = lid yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.getitem,dont_filter=True) #获取更多 def getitem(self,response): data=response.xpath("//game") for obj in data: item=ZuqiuItem() item.id=obj.xpath('./gid') item.team_h=obj.xpath('./team_h') item.team_c = obj.xpath('./team_c') item.ior_RTS2Y = obj.xpath('./ior_RTS2Y') item.ior_RTS2N = obj.xpath('./ior_RTS2N') item.ior_REH = obj.xpath('./ior_REH') item.ior_REC = obj.xpath('./ior_REC') item.ior_ROUC = obj.xpath('./ior_ROUC') item.ior_HROUH = obj.xpath('./ior_HROUH') item.ratio_rouo = obj.xpath('./ratio_rouo') item.ratio_rouu = obj.xpath('./ratio_rouu') yield item ======= class ZuqiuSpider(scrapy.Spider): name = 'zuqiu' allowed_domains = ['m.hgg070.com'] # 读取今日足球 def start_requests(self): headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Content-Length': '130', 'Content-type': 'application/x-www-form-urlencoded', 'Cookie': '_ga=GA1.2.1009358217.1572056223; _gid=GA1.2.97506800.1572056223; _gat=1', 'Host': 'm.hgg070.com', 'Origin': 'http://m.hgg070.com', 'Proxy-Connection': 'keep-alive', 'Referer': 'http://m.hgg070.com/', 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1' } url = "http://m.hgg070.com/app/member/get_league_list.php" h_types = [('FT', '', '130'), ('FU', 'P', '131'), ('FU', "", '130'), ('FU', '', '131')] for h_type in h_types: show_type, isp, length = h_type headers['Content-Length'] = length from_data = { 'uid': '4f6573b99cc31f7be1579b063888a8a2bcda122ce94228510cda9e1bb32a477f', 'langx': 'zh-cn', 'ltype': '3', 'gtype': 'FT', 'showtype': show_type, 'sorttype': '', 'date': '', 'isP': isp } yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=headers, meta={'showtype': show_type, 'isp': isp}, dont_filter=True) def parse(self, response): leagues = response.xpath('//serverresponse/game/league') url = 'http://m.hgg070.com/app/member/get_game_list.php' headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Content-Length': '147', 'Content-type': 'application/x-www-form-urlencoded', 'Cookie': '_ga=GA1.2.1009358217.1572056223; _gid=GA1.2.97506800.1572056223; _gat=1', 'Host': 'm.hgg070.com', 'Origin': 'http://m.hgg070.com', 'Proxy-Connection': 'keep-alive', 'Referer': 'http://m.hgg070.com/', 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1' } if leagues: showtype = response.meta['showtype'] isp = response.meta['isp'] if showtype == 'FT' and isp == '': date = '' headers['Content-Length'] = '147' elif showtype == 'FU' and isp == 'P': date = 'all' headers['Content-Length'] = '151' elif showtype == 'FU' and isp == '': date = 'all' headers['Content-Length'] = '150' else: date = 'all' headers['Content-Length'] = '151' for league in leagues: lid = league.xpath('.//league_id/text()').extract_first() from_data = { 'uid': '4f6573b99cc31f7be1579b063888a8a2bcda122ce94228510cda9e1bb32a477f', 'langx': 'zh-cn', 'ltype': '3', 'gtype': 'FT', 'showtype': showtype, # 'showtype': "FT", # 'lid': '103391', 'lid': lid, 'sorttype': 'league', 'date': date, 'isP': isp # 'date': "", # 'isP': "" } yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=headers, meta={'showtype': showtype, 'isp': isp}, dont_filter=True) def parse_match(self, response): print(response.text) pass >>>>>>> 10c979a4fcc4f2d36f17fa2ecd6de7bad38358f3