lq_sports.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. # -*- coding: utf-8 -*-
  2. import scrapy
  3. from ..items import LanqiuItem
  4. import copy
  5. import lxml.etree
  6. import re,os,json
  7. from ..utils.helper import Helper
  8. import time
  9. from ..items import Odds
  10. class LqSportsSpider(scrapy.Spider):
  11. name = 'lq_sports'
  12. allowed_domains = ['m.hgg070.com/']
  13. start_urls = ['http://m.hgg070.com//']
  14. remath = re.compile("篮球")
  15. custom_settings={
  16. "ITEM_PIPELINES": {
  17. "hgg070_spider.pipelines.lanqiu.ZuqiuPipeline": 200,
  18. },
  19. }
  20. def start_requests(self):
  21. #今日,早盘
  22. h_types=[('FT'),('FU')]
  23. headers = {
  24. 'Accept': '*/*',
  25. 'Accept-Encoding': 'gzip, deflate',
  26. 'Accept-Language': 'zh-CN,zh;q=0.9',
  27. 'Connection': 'keep-alive',
  28. 'Content-Length': '130',
  29. 'Content-type': 'application/x-www-form-urlencoded',
  30. 'Cookie': '_ga=GA1.2.471918301.1572059707; _gid=GA1.2.2109447865.1572059707; _gat=1',
  31. 'Host': 'm.hgg070.com',
  32. 'Origin': 'http://m.hgg070.com',
  33. 'Referer': 'http://m.hgg070.com/',
  34. 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Mobile Safari/537.36'
  35. }
  36. url = "http://m.hgg070.com/app/member/get_league_list.php"
  37. for item in h_types:
  38. showtype = item
  39. data={
  40. 'uid': 'ab179dc88196ff82fbb13c259575332f01fbad2c52b465f5def15a4876c10410',
  41. 'langx': 'zh-cn',
  42. 'ltype': '3',
  43. 'gtype': 'BK',
  44. 'showtype': showtype,
  45. 'sorttype': '',
  46. 'date': '',
  47. 'isP': ''
  48. }
  49. yield scrapy.FormRequest(url=url,formdata=data,callback=self.parse,headers=headers,
  50. meta={"data":data}, dont_filter=True)
  51. def parse(self, response):
  52. #获取id并判断抓取的球型
  53. data=response.meta["data"]
  54. fromdata=copy.deepcopy(data)
  55. league=response.xpath('//league')
  56. url="http://m.hgg070.com/app/member/get_game_list.php"
  57. for le in league:
  58. name=le.xpath('./league_name/text()').extract_first()
  59. if len(self.remath.findall(name))>0:
  60. lid = le.xpath('./league_id/text()').extract_first()
  61. # 抓取今日
  62. if data["showtype"]=="FT":
  63. data['lid'],data['sorttype'],data['date']=lid,'league',''
  64. # 抓取早盘
  65. elif data["showtype"]=="FU":
  66. data['lid'], data['sorttype'], data['date'] = lid, 'league', 'all'
  67. yield scrapy.FormRequest(url=url,formdata=data,callback=self.detailball,meta={"data":fromdata},dont_filter=True)
  68. def detailball(self,response):
  69. data=response.meta["data"]
  70. url="http://m.hgg070.com/app/member/get_game_more.php"
  71. #获取联赛id gid
  72. game=response.xpath("//game")
  73. for g in game:
  74. gid=g.xpath("./gid/text()").extract_first()
  75. data["gid"]=gid
  76. yield scrapy.FormRequest(url=url,formdata=data,callback=self.getItem,dont_filter=True)
  77. def getItem(self,response):
  78. game_lists = []
  79. data=response.xpath("//game")
  80. if data:
  81. uid_list = []
  82. for game in data:
  83. game_odds = {}
  84. gopen = game.xpath('//game/gopen/text()').extract_first()
  85. if gopen == 'Y':
  86. game = lxml.etree.fromstring(game.extract())
  87. for i in game.getchildren():
  88. if i.text == None:
  89. game_odds[i.tag] = ""
  90. else:
  91. game_odds[i.tag] = i.text.replace(' ', '')
  92. game_lists.append(game_odds)
  93. else:
  94. print('gopen == N, 详细赔率盘口未开启')
  95. if game_lists:
  96. for gl in game_lists:
  97. cpath=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  98. with open(cpath+"/conf/hgg070.json",encoding='utf8') as hg:
  99. hgg=json.load(hg)['bk']
  100. odd_list = []
  101. datetime = gl['datetime'][:-8] + " " + gl['datetime'][-8:]
  102. team_h = gl['team_h']
  103. team_c = gl['team_c']
  104. tag = 0
  105. league_id = gl['gidm']
  106. match_id = gl.get('gid', '')
  107. match_uid = Helper.genearte_MD5(team_h + team_c + datetime)
  108. for x in hgg:
  109. try:
  110. enabled = gl[x['prodds']]
  111. if enabled == 'Y':
  112. x['enabled'] = 1
  113. tag += 1
  114. else:
  115. x['enabled'] = 0
  116. except:
  117. enabled = ''
  118. x['enabled'] = 0
  119. items = x['items']
  120. new_items = []
  121. for y in items:
  122. try:
  123. y['oddsv'] = gl[y['rodds']]
  124. except:
  125. y['oddsv'] = 0
  126. try:
  127. y['ratio'] = gl[y['ratio_name']]
  128. except:
  129. y['ratio'] = ""
  130. y['data'] = gl
  131. uid = str(x['plodds']) + str(y['lodds']) + str(y['rodds']) + str(y['ratio']) + str(
  132. y['ratio_name']) + str(y['oddsv']) + str(match_id) + str(league_id)
  133. sl = str(y['lodds']) + str(y['rodds']) + str(y['ratio']) + str(y['ratio_name']) + str(
  134. x['plodds']) + str(x['prodds'])
  135. odds_only = Helper.genearte_MD5(uid)
  136. sole = Helper.genearte_MD5(sl)
  137. y['uid'] = odds_only
  138. y['sole'] = sole
  139. if enabled == 'Y':
  140. uid_list.append(odds_only)
  141. new_items.append(y)
  142. n_i = copy.deepcopy(x)
  143. n_i['items'] = new_items
  144. odd_list.append(n_i)
  145. item = Odds()
  146. item['match_id'] = match_id
  147. item['uuid'] = uid_list
  148. item['source'] = "hg0088"
  149. item['updata'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
  150. item['content'] = odd_list
  151. item['gidm'] = league_id
  152. item['tag'] = tag
  153. item['league'] = gl["league"]
  154. item['match_uid'] = match_uid
  155. item['datetime'] = datetime
  156. item['team_h'] = team_h
  157. item['team_c'] = team_c
  158. print('最后#######################################################',item)
  159. yield item