| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788 |
- # -*- coding: utf-8 -*-
- import scrapy,lxml.etree,pycomm,json
- from collectSports.items import *
- # from mcollect.hg0088 import Resolver
- from biz.zqleague import zqLeague
- class SportslstSpider(scrapy.Spider):
- curSrc=None
- name = 'sportslst'
- allowed_domains = ['hg0088.com']
- # start_urls = ['http://hg0088.com/']
- custom_settings = {
- "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
- "Accept-Encoding":"gzip, deflate",
- "Accept-Language":"zh-CN,zh;q=0.8",
- "Cache-Control":"max-age=0",
- "Connection":"keep-alive",
- "Cookie":"OddType@21627573=H; protocolstr=http; gamePoint_21627573=2019-05-10%2A0%2A0; _ga=GA1.4.601418716.1557495256; _gid=GA1.4.1118061739.1557495256",
- "Host":"199.26.100.178",
- "USER_AGENT": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36",
- "ITEM_PIPELINES": {
- "collectSports.pipelines.sportslst.SportslstPipeline": 200,
- }
- }
- # start_url= 'http://199.26.100.178/app/member/get_game_allbets.php'
- def start_requests(self):
- self.curSrc=source=self.getCurrentSource()
- mc=__import__('mcollect.'+source)
- srcObj=getattr(mc,source)
- # zl=zqLeague()
- # zl.update({'league_id':1})
- for item in srcObj.links:
- url=item['url'].format(uid=srcObj.uid,page=1)
- if not item['cb']:
- params='default'
- else:
- params=str(item['cb'])
- request = scrapy.FormRequest(url, callback=self.parse,meta={'cb':params,'subdel':0})
- yield request
-
- def getCurrentSource(self):
- conf=pycomm.getCache('conf')
- if 'currentSource' in conf:
- return conf['currentSource']
- return
- def parse(self, response):
- cb=response.meta['cb']
- subdel=response.meta['subdel']
- mc=__import__('mcollect.'+self.curSrc+'.Resolver',fromlist=True)
- res=mc.Resolver()
- cbk=getattr(res,cb)
- re=cbk(response.body)
- re=json.loads(re)
-
- if subdel==0:
- if 'total_page' in re:
- self.subStart_request(re['total_page'],response.url,response.meta['cb'],'page_no')
-
- mcs=__import__('mcollect.'+self.curSrc+'.Storage',fromlist=True)
- ress=mcs.Storage()
- cbks=getattr(ress,cb)
- result=cbks(re)
-
- yield result
-
- def subStart_request(self,total_page,url,cb,page_name='page'):
- print(1111)
- while total_page>1:
- newurl=url.replace(page_name+'=0',page_name+'='+total_page)
- newurl=newurl.replace(page_name+'=1',page_name+'='+total_page)
- print(newurl)
- if not cb:
- params='default'
- else:
- params=str(cb)
- request = scrapy.FormRequest(newurl, callback=self.parse,meta={'cb':params,'subdel':1})
- total_page=total_page-1
- yield request
-
|