zuqiu.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. # -*- coding: utf-8 -*-
  2. import scrapy
  3. class ZuqiuSpider(scrapy.Spider):
  4. name = 'zuqiu'
  5. allowed_domains = ['m.hgg070.com']
  6. headers = {
  7. 'Accept': '*/*',
  8. 'Accept-Encoding': 'gzip, deflate',
  9. 'Accept-Language': 'zh-CN,zh;q=0.9',
  10. 'Content-Length': '130',
  11. 'Content-type': 'application/x-www-form-urlencoded',
  12. 'Cookie': '_ga=GA1.2.1009358217.1572056223; _gid=GA1.2.97506800.1572056223; _gat=1',
  13. 'Host': 'm.hgg070.com',
  14. 'Origin': 'http://m.hgg070.com',
  15. 'Proxy-Connection': 'keep-alive',
  16. 'Referer': 'http://m.hgg070.com/',
  17. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
  18. }
  19. # 读取今日足球
  20. def start_requests(self):
  21. url = "http://m.hgg070.com/app/member/get_league_list.php"
  22. h_types = [('FT', '', '130'), ('FU', 'P', '131'), ('FU', "", '130'), ('FU', '', '131')]
  23. for h_type in h_types:
  24. show_type, isp, length = h_type
  25. self.headers['Content-Length'] = length
  26. from_data = {
  27. 'uid': '4f6573b99cc31f7be1579b063888a8a2bcda122ce94228510cda9e1bb32a477f',
  28. 'langx': 'zh-cn',
  29. 'ltype': '3',
  30. 'gtype': 'FT',
  31. 'showtype': show_type,
  32. 'sorttype': '',
  33. 'date': '',
  34. 'isP': isp
  35. }
  36. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers,
  37. meta={'showtype': show_type, 'isp': isp}, dont_filter=True)
  38. def parse(self, response):
  39. leagues = response.xpath('//serverresponse/game/league')
  40. url = 'http://m.hgg070.com/app/member/get_game_list.php'
  41. if leagues:
  42. showtype = response.meta['showtype']
  43. isp = response.meta['isp']
  44. if showtype == 'FT' and isp == '':
  45. date = ''
  46. self.headers['Content-Length'] = '147'
  47. elif showtype == 'FU' and isp == 'P':
  48. date = 'all'
  49. self.headers['Content-Length'] = '151'
  50. elif showtype == 'FU' and isp == '':
  51. date = 'all'
  52. self.headers['Content-Length'] = '150'
  53. else:
  54. date = 'all'
  55. self.headers['Content-Length'] = '151'
  56. for league in leagues:
  57. lid = league.xpath('.//league_id/text()').extract_first()
  58. from_data = {
  59. 'uid': '4f6573b99cc31f7be1579b063888a8a2bcda122ce94228510cda9e1bb32a477f',
  60. 'langx': 'zh-cn',
  61. 'ltype': '3',
  62. 'gtype': 'FT',
  63. 'showtype': showtype,
  64. 'lid': lid,
  65. 'sorttype': 'league',
  66. 'date': date,
  67. 'isP': isp
  68. }
  69. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers,
  70. meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
  71. def parse_match(self, response):
  72. url = 'http://m.hgg070.com/app/member/get_game_more.php'
  73. showtype = response.meta['showtype']
  74. isp = response.meta['isp']
  75. if showtype == 'FT' and isp == '':
  76. date = ''
  77. self.headers['Content-Length'] = '132'
  78. elif showtype == 'FU' and isp == 'P':
  79. date = 'all'
  80. self.headers['Content-Length'] = '136'
  81. elif showtype == 'FU' and isp == '':
  82. date = ''
  83. self.headers['Content-Length'] = '132'
  84. else:
  85. date = 'all'
  86. self.headers['Content-Length'] = '136'
  87. gids = response.xpath('//serverresponse/game/gid/text()').extract()
  88. if gids:
  89. for gid in gids:
  90. from_data = {
  91. 'uid': '4f6573b99cc31f7be1579b063888a8a2bcda122ce94228510cda9e1bb32a477f',
  92. 'langx': 'zh-cn',
  93. 'ltype': '3',
  94. 'gtype': 'FT',
  95. 'showtype': showtype,
  96. 'date': date,
  97. 'isP': isp,
  98. 'gid': gid,
  99. }
  100. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_odds, headers=self.headers,
  101. meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
  102. def parse_odds(self, response):
  103. print(response.text)
  104. pass