zuqiu.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. # -*- coding: utf-8 -*-
  2. import scrapy
  3. <<<<<<< HEAD
  4. from ..items import ZuqiuItem
  5. class ZuqiuSpider(scrapy.Spider):
  6. name = 'zuqiu'
  7. allowed_domains = ['m.hgg070.com/']
  8. headers = {
  9. 'Accept': '*/*',
  10. 'Accept-Encoding': 'gzip, deflate',
  11. 'Accept-Language': 'zh-CN,zh;q=0.9',
  12. 'Content-Length': '130',
  13. 'Content-type': 'application/x-www-form-urlencoded',
  14. 'Cookie': '_ga=GA1.2.471918301.1572059707; _gid=GA1.2.2109447865.1572059707; _gat=1',
  15. 'Host': 'm.hgg070.com',
  16. 'Origin': 'http://m.hgg070.com',
  17. 'Proxy-Connection': 'keep-alive',
  18. 'Referer': 'http://m.hgg070.com/',
  19. 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Mobile Safari/537.36'
  20. }
  21. custom_settings={
  22. "ITEM_PIPELINES": {
  23. "collectSports.pipelines.zuqiu.ZuqiuPipeline": 200,
  24. },
  25. }
  26. #读取今日足球
  27. def start_requests(self):
  28. url="http://m.hgg070.com/app/member/get_league_list.php"
  29. from_data={
  30. 'uid': '7c70e73f576d42d9f6d9fb1fcaa08c47b04bb9279584caedfe65858afb26722d',
  31. 'langx': 'zh-cn',
  32. 'ltype': '3',
  33. 'gtype': 'FT',
  34. 'showtype': 'RB',
  35. 'sorttype': '',
  36. 'date': '',
  37. 'isP': ''
  38. }
  39. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers,meta={'data': from_data}, dont_filter=True)
  40. #解析今日足球,获取所有的联赛,并请求联赛详情页
  41. def parse(self, response):
  42. url='http://m.hgg070.com/app/member/get_game_list.php'
  43. data=response.xpath("//league")
  44. from_data=response.meta['data']
  45. for item in data:
  46. lid = item.xpath('./league_id/text()').extract_first()
  47. from_data['lid'] = lid
  48. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.detailtedMsg,meta={'data': response.meta['data']},dont_filter=True)
  49. # 获取所有玩法
  50. def detailtedMsg(self,response):
  51. url = 'http://m.hgg070.com/app/member/get_game_more.php'
  52. data=response.xpath("//game")
  53. from_data=response.meta['data']
  54. for item in data:
  55. lid = item.xpath('./gid/text()').extract_first()
  56. from_data['gid'] = lid
  57. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.getitem,dont_filter=True)
  58. #获取更多
  59. def getitem(self,response):
  60. data=response.xpath("//game")
  61. for obj in data:
  62. item=ZuqiuItem()
  63. item.id=obj.xpath('./gid')
  64. item.team_h=obj.xpath('./team_h')
  65. item.team_c = obj.xpath('./team_c')
  66. item.ior_RTS2Y = obj.xpath('./ior_RTS2Y')
  67. item.ior_RTS2N = obj.xpath('./ior_RTS2N')
  68. item.ior_REH = obj.xpath('./ior_REH')
  69. item.ior_REC = obj.xpath('./ior_REC')
  70. item.ior_ROUC = obj.xpath('./ior_ROUC')
  71. item.ior_HROUH = obj.xpath('./ior_HROUH')
  72. item.ratio_rouo = obj.xpath('./ratio_rouo')
  73. item.ratio_rouu = obj.xpath('./ratio_rouu')
  74. yield item
  75. =======
  76. class ZuqiuSpider(scrapy.Spider):
  77. name = 'zuqiu'
  78. allowed_domains = ['m.hgg070.com']
  79. # 读取今日足球
  80. def start_requests(self):
  81. headers = {
  82. 'Accept': '*/*',
  83. 'Accept-Encoding': 'gzip, deflate',
  84. 'Accept-Language': 'zh-CN,zh;q=0.9',
  85. 'Content-Length': '130',
  86. 'Content-type': 'application/x-www-form-urlencoded',
  87. 'Cookie': '_ga=GA1.2.1009358217.1572056223; _gid=GA1.2.97506800.1572056223; _gat=1',
  88. 'Host': 'm.hgg070.com',
  89. 'Origin': 'http://m.hgg070.com',
  90. 'Proxy-Connection': 'keep-alive',
  91. 'Referer': 'http://m.hgg070.com/',
  92. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
  93. }
  94. url = "http://m.hgg070.com/app/member/get_league_list.php"
  95. h_types = [('FT', '', '130'), ('FU', 'P', '131'), ('FU', "", '130'), ('FU', '', '131')]
  96. for h_type in h_types:
  97. show_type, isp, length = h_type
  98. headers['Content-Length'] = length
  99. from_data = {
  100. 'uid': '4f6573b99cc31f7be1579b063888a8a2bcda122ce94228510cda9e1bb32a477f',
  101. 'langx': 'zh-cn',
  102. 'ltype': '3',
  103. 'gtype': 'FT',
  104. 'showtype': show_type,
  105. 'sorttype': '',
  106. 'date': '',
  107. 'isP': isp
  108. }
  109. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=headers,
  110. meta={'showtype': show_type, 'isp': isp}, dont_filter=True)
  111. def parse(self, response):
  112. leagues = response.xpath('//serverresponse/game/league')
  113. url = 'http://m.hgg070.com/app/member/get_game_list.php'
  114. headers = {
  115. 'Accept': '*/*',
  116. 'Accept-Encoding': 'gzip, deflate',
  117. 'Accept-Language': 'zh-CN,zh;q=0.9',
  118. 'Content-Length': '147',
  119. 'Content-type': 'application/x-www-form-urlencoded',
  120. 'Cookie': '_ga=GA1.2.1009358217.1572056223; _gid=GA1.2.97506800.1572056223; _gat=1',
  121. 'Host': 'm.hgg070.com',
  122. 'Origin': 'http://m.hgg070.com',
  123. 'Proxy-Connection': 'keep-alive',
  124. 'Referer': 'http://m.hgg070.com/',
  125. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
  126. }
  127. if leagues:
  128. showtype = response.meta['showtype']
  129. isp = response.meta['isp']
  130. if showtype == 'FT' and isp == '':
  131. date = ''
  132. headers['Content-Length'] = '147'
  133. elif showtype == 'FU' and isp == 'P':
  134. date = 'all'
  135. headers['Content-Length'] = '151'
  136. elif showtype == 'FU' and isp == '':
  137. date = 'all'
  138. headers['Content-Length'] = '150'
  139. else:
  140. date = 'all'
  141. headers['Content-Length'] = '151'
  142. for league in leagues:
  143. lid = league.xpath('.//league_id/text()').extract_first()
  144. from_data = {
  145. 'uid': '4f6573b99cc31f7be1579b063888a8a2bcda122ce94228510cda9e1bb32a477f',
  146. 'langx': 'zh-cn',
  147. 'ltype': '3',
  148. 'gtype': 'FT',
  149. 'showtype': showtype,
  150. # 'showtype': "FT",
  151. # 'lid': '103391',
  152. 'lid': lid,
  153. 'sorttype': 'league',
  154. 'date': date,
  155. 'isP': isp
  156. # 'date': "",
  157. # 'isP': ""
  158. }
  159. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=headers,
  160. meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
  161. def parse_match(self, response):
  162. print(response.text)
  163. pass
  164. >>>>>>> 10c979a4fcc4f2d36f17fa2ecd6de7bad38358f3