lq_sports.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. # -*- coding: utf-8 -*-
  2. import scrapy
  3. from ..items import LanqiuItem
  4. import copy
  5. import lxml.etree
  6. import re,os,json
  7. from ..utils.helper import Helper
  8. import time
  9. from ..items import Odds
  10. class LqSportsSpider(scrapy.Spider):
  11. name = 'lq_sports'
  12. allowed_domains = ['m.hgg070.com/']
  13. start_urls = ['http://m.hgg070.com//']
  14. remath = re.compile("篮球")
  15. custom_settings={
  16. "ITEM_PIPELINES": {
  17. "hgg070_spider.pipelines.lq_sports.LqSportsPipeline": 200,
  18. },
  19. }
  20. def start_requests(self):
  21. #今日,早盘
  22. h_types=[('FT'),('FU')]
  23. headers = {
  24. 'Accept': '*/*',
  25. 'Accept-Encoding': 'gzip, deflate',
  26. 'Accept-Language': 'zh-CN,zh;q=0.9',
  27. 'Connection': 'keep-alive',
  28. 'Content-Length': '130',
  29. 'Content-type': 'application/x-www-form-urlencoded',
  30. 'Cookie': '_ga=GA1.2.471918301.1572059707; _gid=GA1.2.2109447865.1572059707; _gat=1',
  31. 'Host': 'm.hgg070.com',
  32. 'Origin': 'http://m.hgg070.com',
  33. 'Referer': 'http://m.hgg070.com/',
  34. 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Mobile Safari/537.36'
  35. }
  36. url = "http://m.hgg070.com/app/member/get_league_list.php"
  37. for item in h_types:
  38. showtype = item
  39. data={
  40. 'uid': 'ab179dc88196ff82fbb13c259575332f01fbad2c52b465f5def15a4876c10410',
  41. 'langx': 'zh-cn',
  42. 'ltype': '3',
  43. 'gtype': 'BK',
  44. 'showtype': showtype,
  45. 'sorttype': '',
  46. 'date': '',
  47. 'isP': ''
  48. }
  49. yield scrapy.FormRequest(url=url,formdata=data,callback=self.parse,headers=headers,
  50. meta={"data":data}, dont_filter=True)
  51. def parse(self, response):
  52. #获取id并判断抓取的球型
  53. data=response.meta["data"]
  54. fromdata=copy.deepcopy(data)
  55. league=response.xpath('//league')
  56. url="http://m.hgg070.com/app/member/get_game_list.php"
  57. for le in league:
  58. name=le.xpath('./league_name/text()').extract_first()
  59. if len(self.remath.findall(name))>0:
  60. lid = le.xpath('./league_id/text()').extract_first()
  61. # 抓取今日
  62. if data["showtype"]=="FT":
  63. data['lid'],data['sorttype'],data['date']=lid,'league',''
  64. # 抓取早盘
  65. elif data["showtype"]=="FU":
  66. data['lid'], data['sorttype'], data['date'] = lid, 'league', 'all'
  67. yield scrapy.FormRequest(url=url,formdata=data,callback=self.detailball,meta={"data":fromdata},dont_filter=True)
  68. def detailball(self,response):
  69. data=response.meta["data"]
  70. url="http://m.hgg070.com/app/member/get_game_more.php"
  71. #获取联赛id gid
  72. game=response.xpath("//game")
  73. for g in game:
  74. gid=g.xpath("./gid/text()").extract_first()
  75. more_count = g.xpath("./more_count/text()").extract_first()
  76. data["gid"]=gid
  77. yield scrapy.FormRequest(url=url,formdata=data,callback=self.getItem,meta={"more_count":more_count,"isP":data["isP"]},dont_filter=True)
  78. def getItem(self,response):
  79. more_count = response.meta["more_count"]
  80. isP = response.meta["isP"]
  81. game_lists = []
  82. data=response.xpath("//game")
  83. if data:
  84. uid_list = []
  85. for game in data:
  86. game_odds = {}
  87. gopen = game.xpath('//game/gopen/text()').extract_first()
  88. if gopen == 'Y':
  89. game = lxml.etree.fromstring(game.extract())
  90. for i in game.getchildren():
  91. if i.text == None:
  92. game_odds[i.tag] = ""
  93. else:
  94. game_odds[i.tag] = i.text.replace(' ', '')
  95. game_lists.append(game_odds)
  96. else:
  97. print('gopen == N, 详细赔率盘口未开启')
  98. if game_lists:
  99. for gl in game_lists:
  100. cpath=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  101. with open(cpath+"/conf/hgg070.json",encoding='utf8') as hg:
  102. hgg=json.load(hg)['bk']
  103. odd_list = []
  104. datetime = gl['datetime'][:-8] + " " + gl['datetime'][-8:]
  105. team_h = gl['team_h']
  106. team_c = gl['team_c']
  107. league_id = gl['gidm']
  108. match_id = gl.get('gid', '')
  109. match_uid = Helper.genearte_uuid(team_h + team_c + datetime)
  110. for x in hgg:
  111. try:
  112. enabled = gl[x['prodds']]
  113. if enabled == 'Y':
  114. x['enabled'] = 1
  115. else:
  116. x['enabled'] = 0
  117. except:
  118. enabled = ''
  119. x['enabled'] = 0
  120. items = x['items']
  121. new_items = []
  122. for y in items:
  123. try:
  124. y['oddsv'] = gl[y['rodds']]
  125. except:
  126. y['oddsv'] = 0
  127. try:
  128. y['ratio'] = gl[y['ratio_name']]
  129. except:
  130. y['ratio'] = ""
  131. y['data'] = gl
  132. uid = str(x['plodds']) + str(y['lodds']) + str(y['rodds']) + str(y['ratio']) + str(
  133. y['ratio_name']) + str(y['oddsv']) + str(match_id) + str(league_id)
  134. sl = str(y['lodds']) + str(y['rodds']) + str(y['ratio']) + str(y['ratio_name']) + str(
  135. x['plodds']) + str(x['prodds'])
  136. odds_only = Helper.genearte_MD5(uid)
  137. sole = Helper.genearte_MD5(sl)
  138. y['uid'] = odds_only
  139. y['sole'] = sole
  140. if enabled == 'Y':
  141. uid_list.append(odds_only)
  142. new_items.append(y)
  143. n_i = copy.deepcopy(x)
  144. n_i['items'] = new_items
  145. odd_list.append(n_i)
  146. item = Odds()
  147. item['match_id'] = match_id
  148. item['uuid'] = uid_list
  149. item['source'] = "hg0088"
  150. item['updata'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
  151. item['content'] = odd_list
  152. item['gidm'] = league_id
  153. item['more_count'] = more_count
  154. item['league'] = gl["league"]
  155. item['match_identity'] = match_uid
  156. item['datetime'] = datetime
  157. item['team_h'] = team_h
  158. item['team_c'] = team_c
  159. print('最后#######################################################',item)
  160. yield item