# -*- coding: utf-8 -*- import scrapy,lxml.etree,pycomm,json from collectSports.items import * # from mcollect.hg0088 import Resolver class SportslstSpider(scrapy.Spider): curSrc=None name = 'sportslst' allowed_domains = ['hg0088.com'] # start_urls = ['http://hg0088.com/'] custom_settings = { "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding":"gzip, deflate", "Accept-Language":"zh-CN,zh;q=0.8", "Cache-Control":"max-age=0", "Connection":"keep-alive", "Cookie":"OddType@21627573=H; protocolstr=http; gamePoint_21627573=2019-05-10%2A0%2A0; _ga=GA1.4.601418716.1557495256; _gid=GA1.4.1118061739.1557495256", "Host":"199.26.100.178", "USER_AGENT": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36", "ITEM_PIPELINES": { "collectSports.pipelines.sportslst.SportslstPipeline": 200, } } # start_url= 'http://199.26.100.178/app/member/get_game_allbets.php' def start_requests(self): self.curSrc=source=self.getCurrentSource() mc=__import__('mcollect.'+source) srcObj=getattr(mc,source) for item in srcObj.links: url=item['url'].format(uid=srcObj.uid,page=1) if not item['cb']: params='default' else: params=str(item['cb']) request = scrapy.FormRequest(url, callback=self.parse,meta={'cb':params}) yield request def getCurrentSource(self): conf=pycomm.getCache('conf') if 'currentSource' in conf: return conf['currentSource'] return def parse(self, response): cb=response.meta['cb'] mc=__import__('mcollect.'+self.curSrc+'.Resolver',fromlist=True) res=mc.Resolver() cbk=getattr(res,cb) re=cbk(response.body) re=json.loads(re) # print(re['total_page']) data=CollectsportsItem() data['zq_league']=data['zq_match']=[] for index in re['data']: lg=ZqLeagueItem() lg['league_id']=index['league_id'] lg['league_name']=index['league_name'] mc=ZqMatch() mc['match_id']=index['match_id'] mc['half_match_id']=index['half_match_id'] mc['host_id']=index['host_id'] mc['guest_id']=index['guest_id'] mc['host_name']=index['host_name'] mc['guest_name']=index['guest_name'] mc['is_half']=0 mc['league_id']=index['league_id'] mc['is_roll']=index['is_roll'] #是否滚球 mc['mdate']=index['mdate'] mc['mtime']=index['mtime'] mc['is_corner']=index['is_corner'] data['zq_match'].append(mc) mc['is_half']=1 mc['match_id']=index['half_match_id'] mc['half_match_id']=0 data['zq_match'].append(mc) data['zq_league'].append(lg) yield data