roll_wangqiu.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. # -*- coding: utf-8 -*-
  2. import json
  3. import logging
  4. # import lxml
  5. import scrapy
  6. import xmltodict
  7. # from ..items import ZuqiuItem
  8. class WangqiuSpider(scrapy.Spider):
  9. name = 'roll_wangqiu'
  10. allowed_domains = ['m.hg0088.com']
  11. custom_settings = {
  12. "ITEM_PIPELINES": {
  13. "hgg070_spider.pipelines.roll_wangqiu.RollPipeline": 200,
  14. },
  15. # 'LOG_LEVEL': 'DEBUG',
  16. # 'LOG_FILE': cpath + "/log/sports_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
  17. }
  18. headers = {
  19. 'Host': 'm.hg0088.com',
  20. 'Connection': 'keep-alive',
  21. # 'Content-Length': '89', # hg0088注释
  22. 'Origin': 'https://m.hg0088.com',
  23. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
  24. 'Content-type': 'application/x-www-form-urlencoded',
  25. 'Accept': '*/*',
  26. 'Sec-Fetch-Site': 'same-origin',
  27. 'Sec-Fetch-Mode': 'cors',
  28. 'Referer': 'https://m.hg0088.com/',
  29. 'Accept-Encoding': 'gzip, deflate, br',
  30. 'Accept-Language': 'zh-CN,zh;q=0.9',
  31. 'Cookie': '_ga=GA1.2.219750064.1572659333; box4pwd_notshow=Y; _gid=GA1.2.2031225008.1572829846; _gat=1'
  32. }
  33. def start_requests(self):
  34. pass
  35. url = "https://m.hg0088.com/app/member/get_league_list.php"
  36. from_data = {
  37. 'uid': 'yv8vy3csm22383986l393491',
  38. 'langx': 'zh-cn',
  39. 'ltype': '4',
  40. 'gtype': 'TN',
  41. 'showtype': 'RB',
  42. 'sorttype': '',
  43. 'date': '',
  44. 'isP': ''
  45. }
  46. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers)
  47. def parse(self, response):
  48. print(111111111111)
  49. pass
  50. leagues = response.xpath('//serverresponse/game/league')
  51. url = 'http://m.hgg070.com/app/member/get_game_list.php'
  52. if leagues:
  53. for league in leagues:
  54. lid = league.xpath('.//league_id/text()').extract_first()
  55. from_data = {
  56. 'uid': '19fbb114b9503aaa806a1920203d73eb85db285f26188e36ae7172f550987364',
  57. 'langx': 'zh-cn',
  58. 'ltype': '4',
  59. 'gtype': 'TN',
  60. 'showtype': 'RB',
  61. 'lid': lid,
  62. 'sorttype': '',
  63. 'date': '',
  64. 'isP': ''
  65. }
  66. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers, dont_filter=True)
  67. else:
  68. print('未获取到联赛id')
  69. return
  70. def parse_match(self, response):
  71. pass
  72. url = 'http://m.hgg070.com/app/member/get_game_more.php'
  73. gids = response.xpath('//serverresponse/game/gid/text()').extract()
  74. tags = response.xpath('//serverresponse/game/more_count/text()').extract()
  75. if gids:
  76. for i, gid in enumerate(gids):
  77. from_data = {
  78. 'uid': '19fbb114b9503aaa806a1920203d73eb85db285f26188e36ae7172f550987364',
  79. 'langx': 'zh-cn',
  80. 'ltype': '4',
  81. 'gtype': 'TN',
  82. 'showtype': 'RB',
  83. 'date': '',
  84. 'isP': '',
  85. 'gid': gid,
  86. }
  87. tag = tags[i]
  88. yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_odds, headers=self.headers,
  89. meta={'tag': tag}, dont_filter=True)
  90. def parse_odds(self, response):
  91. print('111111111111122333334555')
  92. pass
  93. # logger = logging.getLogger(__name__)
  94. # index = response.meta['index']
  95. # tag = response.meta['tag']
  96. # game = xmltodict.parse(response.text)
  97. # try:
  98. # game_odds = game['serverresponse']['game'][0]
  99. # except:
  100. # game_odds = game['serverresponse']['game']
  101. # if game_odds['gopen'] == 'Y':
  102. # item = ZuqiuItem()
  103. # item['data'] = game_odds
  104. # item['index'] = index
  105. # item['tag'] = tag
  106. # yield item
  107. # else:
  108. # logger.info('gopen == "N", 详细赔率盘口未开启')
  109. # return