# -*- coding: utf-8 -*- import scrapy, lxml.etree, pycomm, json from collectSports.items import * # from mcollect.hg0088 import Resolver from biz.zqleague import zqLeague class SportslstSpider(scrapy.Spider): curSrc = None name = 'sportslst' allowed_domains = ['hg0088.com'] # start_urls = ['http://hg0088.com/'] custom_settings = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.8", "Cache-Control": "max-age=0", "Connection": "keep-alive", "Cookie": "OddType@21627573=H; protocolstr=http; gamePoint_21627573=2019-05-10%2A0%2A0; _ga=GA1.4.601418716.1557495256; _gid=GA1.4.1118061739.1557495256", "Host": "199.26.100.178", "USER_AGENT": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36", "ITEM_PIPELINES": { "collectSports.pipelines.sportslst.SportslstPipeline": 200, } } # start_url= 'http://199.26.100.178/app/member/get_game_allbets.php' def start_requests(self): self.curSrc = source = self.getCurrentSource() mc = __import__('mcollect.' + source) srcObj = getattr(mc, source) # zl=zqLeague() # zl.update({'league_id':1}) for item in srcObj.links: url = item['url'].format(uid=srcObj.uid, page=1) if not item['cb']: params = 'default' else: params = str(item['cb']) request = scrapy.FormRequest(url, callback=self.parse, meta={'cb': params, 'subdel': 0}) yield request def getCurrentSource(self): conf = pycomm.getCache('conf') if 'currentSource' in conf: return conf['currentSource'] return def parse(self, response): cb = response.meta['cb'] subdel = response.meta['subdel'] mc = __import__('mcollect.' + self.curSrc + '.Resolver', fromlist=True) res = mc.Resolver() cbk = getattr(res, cb) re = cbk(response.body) re = json.loads(re) print(subdel) if subdel == 0: if 'total_page' in re: self.subStart_request(re['total_page'], response.url, response.meta['cb'], 'page_no') print(6666) mcs = __import__('mcollect.' + self.curSrc + '.Storage', fromlist=True) ress = mcs.Storage() cbks = getattr(ress, cb) result = cbks(re) yield result def subStart_request(self, total_page, url, cb, page_name='page'): while total_page > 1: print(1111) newurl = url.replace(page_name + '=0', page_name + '=' + total_page) newurl = newurl.replace(page_name + '=1', page_name + '=' + total_page) print(newurl) if not cb: params = 'default' else: params = str(cb) request = scrapy.FormRequest(newurl, callback=self.parse, meta={'cb': params, 'subdel': 1}) total_page = total_page - 1 yield request