# -*- coding: utf-8 -*- import copy import time import scrapy import lxml.etree import pycomm import json from collectSports.biz import getMongo from collectSports.items import Odds from collectSports.spiders.setdata import hash_func class SportsSpider(scrapy.Spider): name = 'sports' allowed_domains = ['hg0088.com'] # start_urls = ['http://hg0088.com/'] custom_settings = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.8", "Cache-Control": "max-age=0", "Connection": "keep-alive", # "Cookie": "OddType@21627573=H; protocolstr=http; gamePoint_21627573=2019-05-10%2A0%2A0; _ga=GA1.4.601418716.1557495256; _gid=GA1.4.1118061739.1557495256", "Cookie": "OddType@21627573=H; _ga=GA1.4.773413111.1560825258; _gid=GA1.4.1960743904.1560825258; protocolstr=https; gamePoint_21627573=2019-06-18%2A2%2A0; _gat_UA-75448111-1=1", "Host": "205.201.4.177", "Origin": "https://205.201.4.177", "USER_AGENT": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36", "ITEM_PIPELINES": { "collectSports.pipelines.sports.SportsPipeline": 200, } } start_url = 'https://205.201.4.177/app/member/get_game_allbets.php' def start_requests(self): url = self.start_url mongo = getMongo() zq_competitions = mongo.changeSet('zq_competition').find() for zq_competition in zq_competitions: match_id = str(zq_competition['match_id']) # uuid = zq_competition['uuid'] form_data = { "uid": "jjdpzpc07m21627573l376834", "langx": "zh-cn", "gtype": "FT", "showtype": "FT", "gid": match_id, "ltype": "4", "date": pycomm.gmdate() } request = scrapy.FormRequest(url, formdata=form_data, callback=self.parse, dont_filter=True, meta={'match_id': match_id}) yield request def parse(self, response): game_list = [] games = response.xpath('/serverresponse/game') match_id = response.meta['match_id'] uid_list = [] for game in games: game_odds = {} game = lxml.etree.fromstring(game.extract()) for i in game.getchildren(): game_odds[i.tag] = i.text game_list.append(game_odds) with open('../collectSports/conf/hg0088.json', 'r', encoding='utf8') as hg: hg0088 = json.load(hg)['root'] odd_list = [] for x in hg0088: try: enabled = game_list[0][x['prodds']] if enabled == 'Y': x['enabled'] = 1 else: x['enabled'] = 0 except: x['enabled'] = 0 items = x['items'] new_items = [] for y in items: try: y['oddsv'] = game_list[0][y['rodds']] except: y['oddsv'] = 0 try: y['ratio'] = game_list[0][y['ratio_name']] except: y['ratio'] = "" uid = hash_func(y['lodds'], y['rodds'], y['ratio'], y['ratio_name'], y['oddsv']) y['uid'] = uid uid_list.append(uid) new_items.append(y) n_i = copy.deepcopy(x) n_i['items'] = new_items odd_list.append(n_i) if game_list: gidm = game_list[0].get('gidm', '') else: gidm = '' # print(response.text) item = Odds() item['match_id'] = match_id item['uuid'] = uid_list item['source'] = "hg0088" item['updata'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) item['content'] = odd_list item['gidm'] = gidm yield item