zuqiu.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. # -*- coding: utf-8 -*-
  2. import scrapy
  3. from ..items import ZuqiuItem
  4. class ZuqiuSpider(scrapy.Spider):
  5. name = 'zuqiu'
  6. allowed_domains = ['m.hgg070.com/']
  7. headers = {
  8. 'Accept': '*/*',
  9. 'Accept-Encoding': 'gzip, deflate',
  10. 'Accept-Language': 'zh-CN,zh;q=0.9',
  11. 'Content-Length': '130',
  12. 'Content-type': 'application/x-www-form-urlencoded',
  13. 'Cookie': '_ga=GA1.2.471918301.1572059707; _gid=GA1.2.2109447865.1572059707; _gat=1',
  14. 'Host': 'm.hgg070.com',
  15. 'Origin': 'http://m.hgg070.com',
  16. 'Proxy-Connection': 'keep-alive',
  17. 'Referer': 'http://m.hgg070.com/',
  18. 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Mobile Safari/537.36'
  19. }
  20. custom_settings={
  21. "ITEM_PIPELINES": {
  22. "collectSports.pipelines.zuqiu.ZuqiuPipeline": 200,
  23. },
  24. }
  25. #读取今日足球
  26. def start_requests(self):
  27. url="http://m.hgg070.com/app/member/get_league_list.php"
  28. from_data={
  29. 'uid': '7c70e73f576d42d9f6d9fb1fcaa08c47b04bb9279584caedfe65858afb26722d',
  30. 'langx': 'zh-cn',
  31. 'ltype': '3',
  32. 'gtype': 'FT',
  33. 'showtype': 'RB',
  34. 'sorttype': '',
  35. 'date': '',
  36. 'isP': ''
  37. }
  38. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers,meta={'data': from_data}, dont_filter=True)
  39. #解析今日足球,获取所有的联赛,并请求联赛详情页
  40. def parse(self, response):
  41. url='http://m.hgg070.com/app/member/get_game_list.php'
  42. data=response.xpath("//league")
  43. from_data=response.meta['data']
  44. for item in data:
  45. lid = item.xpath('./league_id/text()').extract_first()
  46. from_data['lid'] = lid
  47. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.detailtedMsg,meta={'data': response.meta['data']},dont_filter=True)
  48. # 获取所有玩法
  49. def detailtedMsg(self,response):
  50. url = 'http://m.hgg070.com/app/member/get_game_more.php'
  51. data=response.xpath("//game")
  52. from_data=response.meta['data']
  53. for item in data:
  54. lid = item.xpath('./gid/text()').extract_first()
  55. from_data['gid'] = lid
  56. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.getitem,dont_filter=True)
  57. #获取更多
  58. def getitem(self,response):
  59. data=response.xpath("//game")
  60. for obj in data:
  61. item=ZuqiuItem()
  62. item.id=obj.xpath('./gid')
  63. item.team_h=obj.xpath('./team_h')
  64. item.team_c = obj.xpath('./team_c')
  65. item.ior_RTS2Y = obj.xpath('./ior_RTS2Y')
  66. item.ior_RTS2N = obj.xpath('./ior_RTS2N')
  67. item.ior_REH = obj.xpath('./ior_REH')
  68. item.ior_REC = obj.xpath('./ior_REC')
  69. item.ior_ROUC = obj.xpath('./ior_ROUC')
  70. item.ior_HROUH = obj.xpath('./ior_HROUH')
  71. item.ratio_rouo = obj.xpath('./ratio_rouo')
  72. item.ratio_rouu = obj.xpath('./ratio_rouu')
  73. yield item