sports.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. # -*- coding: utf-8 -*-
  2. import copy
  3. import time
  4. import scrapy
  5. import lxml.etree
  6. import pycomm
  7. import json
  8. from collectSports.biz import getMongo
  9. from collectSports.items import Odds
  10. from collectSports.spiders.setdata import hash_func
  11. class SportsSpider(scrapy.Spider):
  12. name = 'sports'
  13. allowed_domains = ['hg0088.com']
  14. # start_urls = ['http://hg0088.com/']
  15. custom_settings = {
  16. "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
  17. "Accept-Encoding": "gzip, deflate",
  18. "Accept-Language": "zh-CN,zh;q=0.8",
  19. "Cache-Control": "max-age=0",
  20. "Connection": "keep-alive",
  21. # "Cookie": "OddType@21627573=H; protocolstr=http; gamePoint_21627573=2019-05-10%2A0%2A0; _ga=GA1.4.601418716.1557495256; _gid=GA1.4.1118061739.1557495256",
  22. "Cookie": "OddType@21627573=H; _ga=GA1.4.773413111.1560825258; _gid=GA1.4.1960743904.1560825258; protocolstr=https; gamePoint_21627573=2019-06-18%2A2%2A0; _gat_UA-75448111-1=1",
  23. "Host": "205.201.4.177",
  24. "Origin": "https://205.201.4.177",
  25. "USER_AGENT": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36",
  26. "ITEM_PIPELINES": {
  27. "collectSports.pipelines.sports.SportsPipeline": 200,
  28. }
  29. }
  30. start_url = 'https://205.201.4.177/app/member/get_game_allbets.php'
  31. def start_requests(self):
  32. url = self.start_url
  33. mongo = getMongo()
  34. zq_competitions = mongo.changeSet('zq_competition').find()
  35. for zq_competition in zq_competitions:
  36. match_id = str(zq_competition['match_id'])
  37. # uuid = zq_competition['uuid']
  38. form_data = {
  39. "uid": "jjdpzpc07m21627573l376834",
  40. "langx": "zh-cn",
  41. "gtype": "FT",
  42. "showtype": "FT",
  43. "gid": match_id,
  44. "ltype": "4",
  45. "date": pycomm.gmdate()
  46. }
  47. request = scrapy.FormRequest(url, formdata=form_data, callback=self.parse, dont_filter=True, meta={'match_id': match_id})
  48. yield request
  49. def parse(self, response):
  50. game_list = []
  51. games = response.xpath('/serverresponse/game')
  52. match_id = response.meta['match_id']
  53. uid_list = []
  54. for game in games:
  55. game_odds = {}
  56. game = lxml.etree.fromstring(game.extract())
  57. for i in game.getchildren():
  58. game_odds[i.tag] = i.text
  59. game_list.append(game_odds)
  60. with open('../collectSports/conf/hg0088.json', 'r', encoding='utf8') as hg:
  61. hg0088 = json.load(hg)['root']
  62. odd_list = []
  63. for x in hg0088:
  64. try:
  65. enabled = game_list[0][x['prodds']]
  66. if enabled == 'Y':
  67. x['enabled'] = 1
  68. else:
  69. x['enabled'] = 0
  70. except:
  71. x['enabled'] = 0
  72. items = x['items']
  73. new_items = []
  74. for y in items:
  75. try:
  76. y['oddsv'] = game_list[0][y['rodds']]
  77. except:
  78. y['oddsv'] = 0
  79. try:
  80. y['ratio'] = game_list[0][y['ratio_name']]
  81. except:
  82. y['ratio'] = ""
  83. uid = hash_func(y['lodds'], y['rodds'], y['ratio'], y['ratio_name'], y['oddsv'])
  84. y['uid'] = uid
  85. uid_list.append(uid)
  86. new_items.append(y)
  87. n_i = copy.deepcopy(x)
  88. n_i['items'] = new_items
  89. odd_list.append(n_i)
  90. if game_list:
  91. gidm = game_list[0].get('gidm', '')
  92. else:
  93. gidm = ''
  94. # print(response.text)
  95. item = Odds()
  96. item['match_id'] = match_id
  97. item['uuid'] = uid_list
  98. item['source'] = "hg0088"
  99. item['updata'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
  100. item['content'] = odd_list
  101. item['gidm'] = gidm
  102. yield item