zuqiu.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. # -*- coding: utf-8 -*-
  2. import logging
  3. import lxml
  4. import scrapy
  5. from ..items import ZuqiuItem
  6. class ZuqiuSpider(scrapy.Spider):
  7. name = 'zuqiu'
  8. allowed_domains = ['m.hgg070.com']
  9. custom_settings = {
  10. "ITEM_PIPELINES": {
  11. "hgg070_spider.pipelines.zuqiu.ZuqiuPipeline": 200,
  12. },
  13. # 'LOG_LEVEL': 'DEBUG',
  14. # 'LOG_FILE': cpath + "/log/sports_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
  15. }
  16. headers = {
  17. 'Accept': '*/*',
  18. 'Accept-Encoding': 'gzip, deflate',
  19. 'Accept-Language': 'zh-CN,zh;q=0.9',
  20. 'Content-Length': '130',
  21. 'Content-type': 'application/x-www-form-urlencoded',
  22. 'Cookie': '_ga=GA1.2.1009358217.1572056223; _gid=GA1.2.97506800.1572056223; _gat=1',
  23. 'Host': 'm.hgg070.com',
  24. 'Origin': 'http://m.hgg070.com',
  25. 'Proxy-Connection': 'keep-alive',
  26. 'Referer': 'http://m.hgg070.com/',
  27. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
  28. }
  29. def start_requests(self):
  30. url = "http://m.hgg070.com/app/member/get_league_list.php"
  31. h_types = [('FT', '', '130'), ('FU', 'P', '131'), ('FU', "", '130'), ('FU', 'P', '131')]
  32. for h_type in h_types:
  33. # show_type, isp, length = h_type
  34. show_type, isp, length = h_types[3]
  35. self.headers['Content-Length'] = length
  36. from_data = {
  37. 'uid': '013dc3a00cbd488238236010f78ab4a41af7e6ff05ceb96bc0854b60807a42eb',
  38. 'langx': 'zh-cn',
  39. 'ltype': '3',
  40. 'gtype': 'FT',
  41. 'showtype': show_type,
  42. 'sorttype': '',
  43. 'date': '',
  44. 'isP': isp
  45. }
  46. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers,
  47. meta={'showtype': show_type, 'isp': isp}, dont_filter=True)
  48. def parse(self, response):
  49. if response.status == 400:
  50. print(response.status)
  51. print('parse', response.url)
  52. leagues = response.xpath('//serverresponse/game/league')
  53. url = 'http://m.hgg070.com/app/member/get_game_list.php'
  54. if leagues:
  55. showtype = response.meta['showtype']
  56. isp = response.meta['isp']
  57. if showtype == 'FT' and isp == '':
  58. date = ''
  59. self.headers['Content-Length'] = '147'
  60. elif showtype == 'FU' and isp == 'P':
  61. date = 'all'
  62. self.headers['Content-Length'] = '151'
  63. elif showtype == 'FU' and isp == '':
  64. date = 'all'
  65. self.headers['Content-Length'] = '150'
  66. else:
  67. date = 'all'
  68. self.headers['Content-Length'] = '151'
  69. for league in leagues:
  70. lid = league.xpath('.//league_id/text()').extract_first()
  71. from_data = {
  72. 'uid': '013dc3a00cbd488238236010f78ab4a41af7e6ff05ceb96bc0854b60807a42eb',
  73. 'langx': 'zh-cn',
  74. 'ltype': '3',
  75. 'gtype': 'FT',
  76. 'showtype': showtype,
  77. 'lid': lid,
  78. 'sorttype': 'league',
  79. 'date': date,
  80. 'isP': isp
  81. }
  82. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers,
  83. meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
  84. else:
  85. print('未获取到联赛id')
  86. return
  87. def parse_match(self, response):
  88. if response.status == 400:
  89. print(response.status)
  90. print('parse_match', response.url)
  91. url = 'http://m.hgg070.com/app/member/get_game_more.php'
  92. showtype = response.meta['showtype']
  93. isp = response.meta['isp']
  94. if showtype == 'FT' and isp == '':
  95. date = ''
  96. self.headers['Content-Length'] = '132'
  97. elif showtype == 'FU' and isp == 'P':
  98. date = 'all'
  99. self.headers['Content-Length'] = '136'
  100. elif showtype == 'FU' and isp == '':
  101. date = ''
  102. self.headers['Content-Length'] = '132'
  103. else:
  104. date = 'all'
  105. self.headers['Content-Length'] = '136'
  106. gids = response.xpath('//serverresponse/game/gid/text()').extract()
  107. if gids:
  108. for gid in gids:
  109. from_data = {
  110. 'uid': '013dc3a00cbd488238236010f78ab4a41af7e6ff05ceb96bc0854b60807a42eb',
  111. 'langx': 'zh-cn',
  112. 'ltype': '3',
  113. 'gtype': 'FT',
  114. 'showtype': showtype,
  115. 'date': date,
  116. 'isP': isp,
  117. 'gid': gid,
  118. }
  119. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_odds, headers=self.headers,
  120. meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
  121. def parse_odds(self, response):
  122. # print(response.text)
  123. # game_lists = []
  124. if response.status == 400:
  125. print(response.status)
  126. print('parse_odds', response.url)
  127. game = response.xpath('//serverresponse/game')[0]
  128. logger = logging.getLogger(__name__)
  129. if game:
  130. game_odds = {}
  131. gopen = game.xpath('//game/gopen/text()').extract_first()
  132. if gopen == 'Y':
  133. game = lxml.etree.fromstring(game.extract())
  134. for i in game.getchildren():
  135. if i.text == None:
  136. game_odds[i.tag] = ""
  137. else:
  138. game_odds[i.tag] = i.text
  139. else:
  140. logger.info('gopen == "N", 详细赔率盘口未开启')
  141. item = ZuqiuItem()
  142. item['all'] = game_odds
  143. yield item