# -*- coding: utf-8 -*- import scrapy from ..items import LanqiuItem import copy import lxml.etree import re,os,json from ..utils.helper import Helper import time from ..items import Odds class LqSportsSpider(scrapy.Spider): name = 'lq_sports' allowed_domains = ['m.hgg070.com/'] start_urls = ['http://m.hgg070.com//'] remath = re.compile("篮球") custom_settings={ "ITEM_PIPELINES": { "hgg070_spider.pipelines.lanqiu.ZuqiuPipeline": 200, }, } def start_requests(self): #今日,早盘 h_types=[('FT'),('FU')] headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Connection': 'keep-alive', 'Content-Length': '130', 'Content-type': 'application/x-www-form-urlencoded', 'Cookie': '_ga=GA1.2.471918301.1572059707; _gid=GA1.2.2109447865.1572059707; _gat=1', 'Host': 'm.hgg070.com', 'Origin': 'http://m.hgg070.com', 'Referer': 'http://m.hgg070.com/', 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Mobile Safari/537.36' } url = "http://m.hgg070.com/app/member/get_league_list.php" for item in h_types: showtype = item data={ 'uid': 'ab179dc88196ff82fbb13c259575332f01fbad2c52b465f5def15a4876c10410', 'langx': 'zh-cn', 'ltype': '3', 'gtype': 'BK', 'showtype': showtype, 'sorttype': '', 'date': '', 'isP': '' } yield scrapy.FormRequest(url=url,formdata=data,callback=self.parse,headers=headers, meta={"data":data}, dont_filter=True) def parse(self, response): #获取id并判断抓取的球型 data=response.meta["data"] fromdata=copy.deepcopy(data) league=response.xpath('//league') url="http://m.hgg070.com/app/member/get_game_list.php" for le in league: name=le.xpath('./league_name/text()').extract_first() if len(self.remath.findall(name))>0: lid = le.xpath('./league_id/text()').extract_first() # 抓取今日 if data["showtype"]=="FT": data['lid'],data['sorttype'],data['date']=lid,'league','' # 抓取早盘 elif data["showtype"]=="FU": data['lid'], data['sorttype'], data['date'] = lid, 'league', 'all' yield scrapy.FormRequest(url=url,formdata=data,callback=self.detailball,meta={"data":fromdata},dont_filter=True) def detailball(self,response): data=response.meta["data"] url="http://m.hgg070.com/app/member/get_game_more.php" #获取联赛id gid game=response.xpath("//game") for g in game: gid=g.xpath("./gid/text()").extract_first() data["gid"]=gid yield scrapy.FormRequest(url=url,formdata=data,callback=self.getItem,dont_filter=True) def getItem(self,response): game_lists = [] data=response.xpath("//game") if data: uid_list = [] for game in data: game_odds = {} gopen = game.xpath('//game/gopen/text()').extract_first() if gopen == 'Y': game = lxml.etree.fromstring(game.extract()) for i in game.getchildren(): if i.text == None: game_odds[i.tag] = "" else: game_odds[i.tag] = i.text.replace(' ', '') game_lists.append(game_odds) else: print('gopen == N, 详细赔率盘口未开启') if game_lists: for gl in game_lists: cpath=os.path.dirname(os.path.dirname(os.path.abspath(__file__))) with open(cpath+"/conf/hgg070.json",encoding='utf8') as hg: hgg=json.load(hg)['bk'] odd_list = [] datetime = gl['datetime'][:-8] + " " + gl['datetime'][-8:] team_h = gl['team_h'] team_c = gl['team_c'] tag = 0 league_id = gl['gidm'] match_id = gl.get('gid', '') match_uid = Helper.genearte_MD5(team_h + team_c + datetime) for x in hgg: try: enabled = gl[x['prodds']] if enabled == 'Y': x['enabled'] = 1 tag += 1 else: x['enabled'] = 0 except: enabled = '' x['enabled'] = 0 items = x['items'] new_items = [] for y in items: try: y['oddsv'] = gl[y['rodds']] except: y['oddsv'] = 0 try: y['ratio'] = gl[y['ratio_name']] except: y['ratio'] = "" y['data'] = gl uid = str(x['plodds']) + str(y['lodds']) + str(y['rodds']) + str(y['ratio']) + str( y['ratio_name']) + str(y['oddsv']) + str(match_id) + str(league_id) sl = str(y['lodds']) + str(y['rodds']) + str(y['ratio']) + str(y['ratio_name']) + str( x['plodds']) + str(x['prodds']) odds_only = Helper.genearte_MD5(uid) sole = Helper.genearte_MD5(sl) y['uid'] = odds_only y['sole'] = sole if enabled == 'Y': uid_list.append(odds_only) new_items.append(y) n_i = copy.deepcopy(x) n_i['items'] = new_items odd_list.append(n_i) item = Odds() item['match_id'] = match_id item['uuid'] = uid_list item['source'] = "hg0088" item['updata'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) item['content'] = odd_list item['gidm'] = league_id item['tag'] = tag item['league'] = gl["league"] item['match_uid'] = match_uid item['datetime'] = datetime item['team_h'] = team_h item['team_c'] = team_c print('最后#######################################################',item) yield item