sportslst.py 3.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. # -*- coding: utf-8 -*-
  2. import scrapy, lxml.etree, pycomm, json
  3. from collectSports.items import *
  4. # from mcollect.hg0088 import Resolver
  5. from biz.zqleague import zqLeague
  6. class SportslstSpider(scrapy.Spider):
  7. curSrc = None
  8. name = 'sportslst'
  9. allowed_domains = ['hg0088.com']
  10. # start_urls = ['http://hg0088.com/']
  11. custom_settings = {
  12. "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
  13. "Accept-Encoding": "gzip, deflate",
  14. "Accept-Language": "zh-CN,zh;q=0.8",
  15. "Cache-Control": "max-age=0",
  16. "Connection": "keep-alive",
  17. "Cookie": "OddType@21627573=H; protocolstr=http; gamePoint_21627573=2019-05-10%2A0%2A0; _ga=GA1.4.601418716.1557495256; _gid=GA1.4.1118061739.1557495256",
  18. "Host": "199.26.100.178",
  19. "USER_AGENT": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36",
  20. "ITEM_PIPELINES": {
  21. "collectSports.pipelines.sportslst.SportslstPipeline": 200,
  22. }
  23. }
  24. # start_url= 'http://199.26.100.178/app/member/get_game_allbets.php'
  25. def start_requests(self):
  26. self.curSrc = source = self.getCurrentSource()
  27. mc = __import__('mcollect.' + source)
  28. srcObj = getattr(mc, source)
  29. # zl=zqLeague()
  30. # zl.update({'league_id':1})
  31. for item in srcObj.links:
  32. url = item['url'].format(uid=srcObj.uid, page=1)
  33. if not item['cb']:
  34. params = 'default'
  35. else:
  36. params = str(item['cb'])
  37. request = scrapy.FormRequest(url, callback=self.parse, meta={'cb': params, 'subdel': 0})
  38. yield request
  39. def getCurrentSource(self):
  40. conf = pycomm.getCache('conf')
  41. if 'currentSource' in conf:
  42. return conf['currentSource']
  43. return
  44. def parse(self, response):
  45. cb = response.meta['cb']
  46. subdel = response.meta['subdel']
  47. mc = __import__('mcollect.' + self.curSrc + '.Resolver', fromlist=True)
  48. res = mc.Resolver()
  49. cbk = getattr(res, cb)
  50. re = cbk(response.body)
  51. re = json.loads(re)
  52. print(subdel)
  53. if subdel == 0:
  54. if 'total_page' in re:
  55. self.subStart_request(re['total_page'], response.url, response.meta['cb'], 'page_no')
  56. print(6666)
  57. mcs = __import__('mcollect.' + self.curSrc + '.Storage', fromlist=True)
  58. ress = mcs.Storage()
  59. cbks = getattr(ress, cb)
  60. result = cbks(re)
  61. cb = response.meta['cb']
  62. subdel = response.meta['subdel']
  63. mc = __import__('mcollect.' + self.curSrc + '.Resolver', fromlist=True)
  64. res = mc.Resolver()
  65. cbk = getattr(res, cb)
  66. re = cbk(response.body)
  67. re = json.loads(re)
  68. if subdel == 0:
  69. if 'total_page' in re:
  70. self.subStart_request(re['total_page'], response.url, response.meta['cb'], 'page_no')
  71. mcs = __import__('mcollect.' + self.curSrc + '.Storage', fromlist=True)
  72. ress = mcs.Storage()
  73. cbks = getattr(ress, cb)
  74. result = cbks(re)
  75. yield result
  76. def subStart_request(self, total_page, url, cb, page_name='page'):
  77. while total_page > 1:
  78. print(1111)
  79. newurl = url.replace(page_name + '=0', page_name + '=' + total_page)
  80. newurl = newurl.replace(page_name + '=1', page_name + '=' + total_page)
  81. print(newurl)
  82. if not cb:
  83. params = 'default'
  84. else:
  85. params = str(cb)
  86. request = scrapy.FormRequest(newurl, callback=self.parse, meta={'cb': params, 'subdel': 1})
  87. total_page = total_page - 1
  88. yield request