| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102 |
- # -*- coding: utf-8 -*-
- import scrapy, lxml.etree, pycomm, json
- from collectSports.items import *
- # from mcollect.hg0088 import Resolver
- from biz.zqleague import zqLeague
- class SportslstSpider(scrapy.Spider):
- curSrc = None
- name = 'sportslst'
- allowed_domains = ['hg0088.com']
- # start_urls = ['http://hg0088.com/']
- custom_settings = {
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
- "Accept-Encoding": "gzip, deflate",
- "Accept-Language": "zh-CN,zh;q=0.8",
- "Cache-Control": "max-age=0",
- "Connection": "keep-alive",
- "Cookie": "OddType@21627573=H; protocolstr=http; gamePoint_21627573=2019-05-10%2A0%2A0; _ga=GA1.4.601418716.1557495256; _gid=GA1.4.1118061739.1557495256",
- "Host": "199.26.100.178",
- "USER_AGENT": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36",
- "ITEM_PIPELINES": {
- "collectSports.pipelines.sportslst.SportslstPipeline": 200,
- }
- }
- # start_url= 'http://199.26.100.178/app/member/get_game_allbets.php'
- def start_requests(self):
- self.curSrc = source = self.getCurrentSource()
- mc = __import__('mcollect.' + source)
- srcObj = getattr(mc, source)
- # zl=zqLeague()
- # zl.update({'league_id':1})
- for item in srcObj.links:
- url = item['url'].format(uid=srcObj.uid, page=1)
- if not item['cb']:
- params = 'default'
- else:
- params = str(item['cb'])
- request = scrapy.FormRequest(url, callback=self.parse, meta={'cb': params, 'subdel': 0})
- yield request
- def getCurrentSource(self):
- conf = pycomm.getCache('conf')
- if 'currentSource' in conf:
- return conf['currentSource']
- return
- def parse(self, response):
- <<<<<<< HEAD
- cb = response.meta['cb']
- subdel = response.meta['subdel']
- mc = __import__('mcollect.' + self.curSrc + '.Resolver', fromlist=True)
- res = mc.Resolver()
- cbk = getattr(res, cb)
- re = cbk(response.body)
- re = json.loads(re)
- print(subdel)
- if subdel == 0:
- if 'total_page' in re:
- self.subStart_request(re['total_page'], response.url, response.meta['cb'], 'page_no')
- print(6666)
- mcs = __import__('mcollect.' + self.curSrc + '.Storage', fromlist=True)
- ress = mcs.Storage()
- cbks = getattr(ress, cb)
- result = cbks(re)
- =======
- cb=response.meta['cb']
- subdel=response.meta['subdel']
- mc=__import__('mcollect.'+self.curSrc+'.Resolver',fromlist=True)
- res=mc.Resolver()
- cbk=getattr(res,cb)
- re=cbk(response.body)
- re=json.loads(re)
-
- if subdel==0:
- if 'total_page' in re:
- self.subStart_request(re['total_page'],response.url,response.meta['cb'],'page_no')
-
- mcs=__import__('mcollect.'+self.curSrc+'.Storage',fromlist=True)
- ress=mcs.Storage()
- cbks=getattr(ress,cb)
- result=cbks(re)
-
- >>>>>>> 701da4897c2e812ffc1dfb9d6c610731de87bca1
- yield result
- def subStart_request(self, total_page, url, cb, page_name='page'):
- while total_page > 1:
- print(1111)
- newurl = url.replace(page_name + '=0', page_name + '=' + total_page)
- newurl = newurl.replace(page_name + '=1', page_name + '=' + total_page)
- print(newurl)
- if not cb:
- params = 'default'
- else:
- params = str(cb)
- request = scrapy.FormRequest(newurl, callback=self.parse, meta={'cb': params, 'subdel': 1})
- total_page = total_page - 1
- yield request
|