zuqiu.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. # -*- coding: utf-8 -*-
  2. import logging
  3. # import lxml
  4. import scrapy
  5. import xmltodict
  6. from ..items import ZuqiuItem
  7. class ZuqiuSpider(scrapy.Spider):
  8. name = 'zuqiu'
  9. allowed_domains = ['m.hgg070.com']
  10. custom_settings = {
  11. "ITEM_PIPELINES": {
  12. "hgg070_spider.pipelines.zuqiu.ZuqiuPipeline": 200,
  13. },
  14. # 'LOG_LEVEL': 'DEBUG',
  15. # 'LOG_FILE': cpath + "/log/sports_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
  16. }
  17. headers = {
  18. 'Accept': '*/*',
  19. 'Accept-Encoding': 'gzip, deflate',
  20. 'Accept-Language': 'zh-CN,zh;q=0.9',
  21. 'Content-Length': '130',
  22. 'Content-type': 'application/x-www-form-urlencoded',
  23. 'Cookie': '_ga=GA1.2.1009358217.1572056223; _gid=GA1.2.97506800.1572056223; _gat=1',
  24. 'Host': 'm.hgg070.com',
  25. 'Origin': 'http://m.hgg070.com',
  26. 'Proxy-Connection': 'keep-alive',
  27. 'Referer': 'http://m.hgg070.com/',
  28. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
  29. }
  30. def start_requests(self):
  31. url = "http://m.hgg070.com/app/member/get_league_list.php"
  32. h_types = [('FT', '', '130'), ('FU', 'P', '131'), ('FU', "", '130'), ('FU', 'P', '131')]
  33. for i, h_type in enumerate(h_types):
  34. show_type, isp, length = h_type
  35. self.headers['Content-Length'] = length
  36. from_data = {
  37. 'uid': '19fbb114b9503aaa806a1920203d73eb85db285f26188e36ae7172f550987364',
  38. 'langx': 'zh-cn',
  39. 'ltype': '3',
  40. 'gtype': 'FT',
  41. 'showtype': show_type,
  42. 'sorttype': '',
  43. 'date': '',
  44. 'isP': isp
  45. }
  46. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers,
  47. meta={'index': i}, dont_filter=True)
  48. def parse(self, response):
  49. leagues = response.xpath('//serverresponse/game/league')
  50. url = 'http://m.hgg070.com/app/member/get_game_list.php'
  51. if leagues:
  52. index = response.meta['index']
  53. if index == 0:
  54. date = ''
  55. showtype = 'FT'
  56. isp = ''
  57. self.headers['Content-Length'] = '147'
  58. elif index == 2:
  59. date = 'all'
  60. showtype = 'FU'
  61. isp = ''
  62. self.headers['Content-Length'] = '150'
  63. else:
  64. date = 'all'
  65. showtype = 'FU'
  66. isp = 'P'
  67. self.headers['Content-Length'] = '151'
  68. for league in leagues:
  69. lid = league.xpath('.//league_id/text()').extract_first()
  70. from_data = {
  71. 'uid': '19fbb114b9503aaa806a1920203d73eb85db285f26188e36ae7172f550987364',
  72. 'langx': 'zh-cn',
  73. 'ltype': '3',
  74. 'gtype': 'FT',
  75. 'showtype': showtype,
  76. 'lid': lid,
  77. 'sorttype': 'league',
  78. 'date': date,
  79. 'isP': isp
  80. }
  81. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers,
  82. meta={'index': index}, dont_filter=True)
  83. else:
  84. print('未获取到联赛id')
  85. return
  86. def parse_match(self, response):
  87. index = response.meta['index']
  88. url = 'http://m.hgg070.com/app/member/get_game_more.php'
  89. if index == 0:
  90. date = ''
  91. showtype = 'FT'
  92. isp = ''
  93. self.headers['Content-Length'] = '132'
  94. elif index == 1:
  95. date = 'all'
  96. showtype = 'FT'
  97. isp = 'P'
  98. self.headers['Content-Length'] = '136'
  99. elif index == 2:
  100. date = ''
  101. showtype = 'FU'
  102. isp = ''
  103. self.headers['Content-Length'] = '132'
  104. else:
  105. date = 'all'
  106. showtype = 'FU'
  107. isp = 'P'
  108. self.headers['Content-Length'] = '136'
  109. gids = response.xpath('//serverresponse/game/gid/text()').extract()
  110. tags = response.xpath('//serverresponse/game/more_count/text()').extract()
  111. if gids:
  112. for i, gid in enumerate(gids):
  113. from_data = {
  114. 'uid': '19fbb114b9503aaa806a1920203d73eb85db285f26188e36ae7172f550987364',
  115. 'langx': 'zh-cn',
  116. 'ltype': '3',
  117. 'gtype': 'FT',
  118. 'showtype': showtype,
  119. 'date': date,
  120. 'isP': isp,
  121. 'gid': gid,
  122. }
  123. tag = tags[i]
  124. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_odds, headers=self.headers,
  125. meta={'index': index, 'tag': tag}, dont_filter=True)
  126. def parse_odds(self, response):
  127. logger = logging.getLogger(__name__)
  128. index = response.meta['index']
  129. tag = response.meta['tag']
  130. game = xmltodict.parse(response.text)
  131. try:
  132. game_odds = game['serverresponse']['game'][0]
  133. except:
  134. game_odds = game['serverresponse']['game']
  135. if game_odds['gopen'] == 'Y':
  136. item = ZuqiuItem()
  137. item['data'] = game_odds
  138. item['index'] = index
  139. item['tag'] = tag
  140. yield item
  141. else:
  142. logger.info('gopen == "N", 详细赔率盘口未开启')
  143. return