sportslst.py 3.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. # -*- coding: utf-8 -*-
  2. import scrapy,lxml.etree,pycomm,json
  3. from collectSports.items import *
  4. # from mcollect.hg0088 import Resolver
  5. class SportslstSpider(scrapy.Spider):
  6. curSrc=None
  7. name = 'sportslst'
  8. allowed_domains = ['hg0088.com']
  9. # start_urls = ['http://hg0088.com/']
  10. custom_settings = {
  11. "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
  12. "Accept-Encoding":"gzip, deflate",
  13. "Accept-Language":"zh-CN,zh;q=0.8",
  14. "Cache-Control":"max-age=0",
  15. "Connection":"keep-alive",
  16. "Cookie":"OddType@21627573=H; protocolstr=http; gamePoint_21627573=2019-05-10%2A0%2A0; _ga=GA1.4.601418716.1557495256; _gid=GA1.4.1118061739.1557495256",
  17. "Host":"199.26.100.178",
  18. "USER_AGENT": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36",
  19. "ITEM_PIPELINES": {
  20. "collectSports.pipelines.sportslst.SportslstPipeline": 200,
  21. }
  22. }
  23. # start_url= 'http://199.26.100.178/app/member/get_game_allbets.php'
  24. def start_requests(self):
  25. self.curSrc=source=self.getCurrentSource()
  26. mc=__import__('mcollect.'+source)
  27. srcObj=getattr(mc,source)
  28. for item in srcObj.links:
  29. url=item['url'].format(uid=srcObj.uid,page=1)
  30. if not item['cb']:
  31. params='default'
  32. else:
  33. params=str(item['cb'])
  34. request = scrapy.FormRequest(url, callback=self.parse,meta={'cb':params})
  35. yield request
  36. def getCurrentSource(self):
  37. conf=pycomm.getCache('conf')
  38. if 'currentSource' in conf:
  39. return conf['currentSource']
  40. return
  41. def parse(self, response):
  42. cb=response.meta['cb']
  43. mc=__import__('mcollect.'+self.curSrc+'.Resolver',fromlist=True)
  44. res=mc.Resolver()
  45. cbk=getattr(res,cb)
  46. re=cbk(response.body)
  47. re=json.loads(re)
  48. # print(re['total_page'])
  49. data=CollectsportsItem()
  50. data['zq_league']=data['zq_match']=[]
  51. for index in re['data']:
  52. lg=ZqLeagueItem()
  53. lg['league_id']=index['league_id']
  54. lg['league_name']=index['league_name']
  55. mc=ZqMatch()
  56. mc['match_id']=index['match_id']
  57. mc['half_match_id']=index['half_match_id']
  58. mc['host_id']=index['host_id']
  59. mc['guest_id']=index['guest_id']
  60. mc['host_name']=index['host_name']
  61. mc['guest_name']=index['guest_name']
  62. mc['is_half']=0
  63. mc['league_id']=index['league_id']
  64. mc['is_roll']=index['is_roll'] #是否滚球
  65. mc['mdate']=index['mdate']
  66. mc['mtime']=index['mtime']
  67. mc['is_corner']=index['is_corner']
  68. data['zq_match'].append(mc)
  69. mc['is_half']=1
  70. mc['match_id']=index['half_match_id']
  71. mc['half_match_id']=0
  72. data['zq_match'].append(mc)
  73. data['zq_league'].append(lg)
  74. yield data