roll_zuqiu.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. # -*- coding: utf-8 -*-
  2. import logging
  3. # import lxml
  4. import scrapy
  5. import xmltodict
  6. from ..items import ZuqiuItem
  7. class ZuqiuSpider(scrapy.Spider):
  8. name = 'roll_zuqiu'
  9. allowed_domains = ['m.hgg070.com']
  10. custom_settings = {
  11. "ITEM_PIPELINES": {
  12. "hgg070_spider.pipelines.roll_zuqiu.RollPipeline": 200,
  13. },
  14. # 'LOG_LEVEL': 'DEBUG',
  15. # 'LOG_FILE': cpath + "/log/sports_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
  16. }
  17. headers = {
  18. 'Accept': '*/*',
  19. 'Accept-Encoding': 'gzip, deflate',
  20. 'Accept-Language': 'zh-CN,zh;q=0.9',
  21. 'Content-Length': '130',
  22. 'Content-type': 'application/x-www-form-urlencoded',
  23. 'Cookie': '_ga=GA1.2.1009358217.1572056223; _gid=GA1.2.97506800.1572056223; _gat=1',
  24. 'Host': 'm.hgg070.com',
  25. 'Origin': 'http://m.hgg070.com',
  26. 'Proxy-Connection': 'keep-alive',
  27. 'Referer': 'http://m.hgg070.com/',
  28. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
  29. }
  30. def start_requests(self):
  31. url = "http://m.hgg070.com/app/member/get_league_list.php"
  32. from_data = {
  33. 'uid': '9dc21fa757cc4eb44a0cf29d49ec3706eb24be8507c02681f9b204014c48e6c3',
  34. 'langx': 'zh-cn',
  35. 'ltype': '3',
  36. 'gtype': 'FT',
  37. 'showtype': 'RB',
  38. 'sorttype': '',
  39. 'date': '',
  40. 'isP': ''
  41. }
  42. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers, dont_filter=True)
  43. def parse(self, response):
  44. leagues = response.xpath('//serverresponse/game/league')
  45. url = 'http://m.hgg070.com/app/member/get_game_list.php'
  46. if leagues:
  47. self.headers['Content-Length'] = '141'
  48. for league in leagues:
  49. lid = league.xpath('.//league_id/text()').extract_first()
  50. from_data = {
  51. 'uid': '9dc21fa757cc4eb44a0cf29d49ec3706eb24be8507c02681f9b204014c48e6c3',
  52. 'langx': 'zh-cn',
  53. 'ltype': '3',
  54. 'gtype': 'FT',
  55. 'showtype': 'RB',
  56. 'lid': lid,
  57. 'sorttype': '',
  58. 'date': '',
  59. 'isP': ''
  60. }
  61. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers, dont_filter=True)
  62. else:
  63. print('未获取到联赛id')
  64. return
  65. def parse_match(self, response):
  66. url = 'http://m.hgg070.com/app/member/get_game_more.php'
  67. self.headers['Content-Length'] = '132'
  68. gids = response.xpath('//serverresponse/game/gid/text()').extract()
  69. tags = response.xpath('//serverresponse/game/more_count/text()').extract()
  70. if gids:
  71. for i, gid in enumerate(gids):
  72. from_data = {
  73. 'uid': '9dc21fa757cc4eb44a0cf29d49ec3706eb24be8507c02681f9b204014c48e6c3',
  74. 'langx': 'zh-cn',
  75. 'ltype': '3',
  76. 'gtype': 'FT',
  77. 'showtype': 'RB',
  78. 'date': '',
  79. 'isP': '',
  80. 'gid': gid,
  81. }
  82. tag = tags[i]
  83. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_odds, headers=self.headers,
  84. meta={'tag': tag}, dont_filter=True)
  85. def parse_odds(self, response):
  86. logger = logging.getLogger(__name__)
  87. tag = response.meta['tag']
  88. game = xmltodict.parse(response.text)
  89. try:
  90. game_odds = game['serverresponse']['game'][0]
  91. except:
  92. game_odds = game['serverresponse']['game']
  93. if game_odds['gopen'] == 'Y':
  94. item = ZuqiuItem()
  95. item['data'] = game_odds
  96. item['tag'] = tag
  97. yield item
  98. else:
  99. logger.info('gopen == "N", 详细赔率盘口未开启')
  100. return