zuqiu.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. # -*- coding: utf-8 -*-
  2. import logging
  3. import lxml
  4. import scrapy
  5. from items import ZuqiuItem
  6. class ZuqiuSpider(scrapy.Spider):
  7. name = 'zuqiu'
  8. allowed_domains = ['m.hgg070.com']
  9. custom_settings = {
  10. "ITEM_PIPELINES": {
  11. # "hgg070_spider.pipelines.zuqiu.ZuqiuPipeline": 200,
  12. },
  13. # 'LOG_LEVEL': 'DEBUG',
  14. # 'LOG_FILE': cpath + "/log/sports_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
  15. }
  16. headers = {
  17. 'Accept': '*/*',
  18. 'Accept-Encoding': 'gzip, deflate',
  19. 'Accept-Language': 'zh-CN,zh;q=0.9',
  20. 'Content-Length': '130',
  21. 'Content-type': 'application/x-www-form-urlencoded',
  22. 'Cookie': '_ga=GA1.2.1009358217.1572056223; _gid=GA1.2.97506800.1572056223; _gat=1',
  23. 'Host': 'm.hgg070.com',
  24. 'Origin': 'http://m.hgg070.com',
  25. 'Proxy-Connection': 'keep-alive',
  26. 'Referer': 'http://m.hgg070.com/',
  27. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
  28. }
  29. def start_requests(self):
  30. url = "http://m.hgg070.com/app/member/get_league_list.php"
  31. h_types = [('FT', '', '130'), ('FU', 'P', '131'), ('FU', "", '130'), ('FU', 'P', '131')]
  32. # for h_type in h_types:
  33. for i, h_type in enumerate(h_types):
  34. show_type, isp, length = h_type
  35. self.headers['Content-Length'] = length
  36. from_data = {
  37. 'uid': 'a76428ffc4d1ca306ea354d3ff5013bb8095c8d4101e7ce76db97f63ff061729',
  38. 'langx': 'zh-cn',
  39. 'ltype': '3',
  40. 'gtype': 'FT',
  41. 'showtype': show_type,
  42. 'sorttype': '',
  43. 'date': '',
  44. 'isP': isp
  45. }
  46. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers,
  47. meta={'showtype': show_type, 'isp': isp, 'index': i}, dont_filter=True)
  48. def parse(self, response):
  49. leagues = response.xpath('//serverresponse/game/league')
  50. url = 'http://m.hgg070.com/app/member/get_game_list.php'
  51. if leagues:
  52. showtype = response.meta['showtype']
  53. isp = response.meta['isp']
  54. index = response.meta['index']
  55. if index == 0:
  56. date = ''
  57. self.headers['Content-Length'] = '147'
  58. elif index == 2:
  59. date = 'all'
  60. self.headers['Content-Length'] = '150'
  61. else:
  62. date = 'all'
  63. self.headers['Content-Length'] = '151'
  64. for league in leagues:
  65. lid = league.xpath('.//league_id/text()').extract_first()
  66. from_data = {
  67. 'uid': 'a76428ffc4d1ca306ea354d3ff5013bb8095c8d4101e7ce76db97f63ff061729',
  68. 'langx': 'zh-cn',
  69. 'ltype': '3',
  70. 'gtype': 'FT',
  71. 'showtype': showtype,
  72. 'lid': lid,
  73. 'sorttype': 'league',
  74. 'date': date,
  75. 'isP': isp
  76. }
  77. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers,
  78. meta={'showtype': showtype, 'isp': isp, 'index': index}, dont_filter=True)
  79. else:
  80. print('未获取到联赛id')
  81. return
  82. def parse_match(self, response):
  83. # showtype = response.meta['showtype']
  84. # isp = response.meta['isp']
  85. index = response.meta['index']
  86. if response.status == 400:
  87. print(response.status)
  88. # print(showtype, isp)
  89. print('parse_odds', response.url)
  90. pass
  91. url = 'http://m.hgg070.com/app/member/get_game_more.php'
  92. if index == 0:
  93. date = ''
  94. showtype = 'FT'
  95. isp = ''
  96. self.headers['Content-Length'] = '132'
  97. elif index == 1:
  98. date = 'all'
  99. showtype = 'FT'
  100. isp = 'P'
  101. self.headers['Content-Length'] = '136'
  102. elif index == 2:
  103. date = ''
  104. showtype = 'FU'
  105. isp = ''
  106. self.headers['Content-Length'] = '132'
  107. else:
  108. date = 'all'
  109. showtype = 'FU'
  110. isp = 'P'
  111. self.headers['Content-Length'] = '136'
  112. # showtype = response.meta['showtype']
  113. # isp = response.meta['isp']
  114. # if showtype == 'FT' and isp == '':
  115. # date = ''
  116. # self.headers['Content-Length'] = '132'
  117. # elif showtype == 'FU' and isp == 'P':
  118. # date = 'all'
  119. # self.headers['Content-Length'] = '136'
  120. # elif showtype == 'FU' and isp == '':
  121. # date = ''
  122. # self.headers['Content-Length'] = '132'
  123. # else:
  124. # date = 'all'
  125. # self.headers['Content-Length'] = '136'
  126. gids = response.xpath('//serverresponse/game/gid/text()').extract()
  127. if gids:
  128. for gid in gids:
  129. from_data = {
  130. 'uid': 'a76428ffc4d1ca306ea354d3ff5013bb8095c8d4101e7ce76db97f63ff061729',
  131. 'langx': 'zh-cn',
  132. 'ltype': '3',
  133. 'gtype': 'FT',
  134. 'showtype': showtype,
  135. 'date': date,
  136. 'isP': isp,
  137. 'gid': gid,
  138. }
  139. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_odds, headers=self.headers,
  140. meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
  141. def parse_odds(self, response):
  142. if response.status == 400:
  143. print(response.status)
  144. print('parse_odds', response.url)
  145. game = response.xpath('//serverresponse/game')[0]
  146. logger = logging.getLogger(__name__)
  147. if game:
  148. game_odds = {}
  149. gopen = game.xpath('//game/gopen/text()').extract_first()
  150. if gopen == 'Y':
  151. game = lxml.etree.fromstring(game.extract())
  152. for i in game.getchildren():
  153. if i.text == None:
  154. game_odds[i.tag] = ""
  155. else:
  156. game_odds[i.tag] = i.text
  157. else:
  158. pass
  159. # logger.info('gopen == "N", 详细赔率盘口未开启')
  160. item = ZuqiuItem()
  161. item['all'] = game_odds
  162. yield item