# -*- coding: utf-8 -*- import copy import datetime import time import scrapy import lxml.etree import pycomm import json from collectSports.biz import getMongo from collectSports.items import Odds class SportsSpider(scrapy.Spider): name = 'sports' allowed_domains = ['hg0088.com'] # start_urls = ['http://hg0088.com/'] custom_settings = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.8", "Cache-Control": "max-age=0", "Connection": "keep-alive", # "Cookie": "OddType@21627573=H; protocolstr=http; gamePoint_21627573=2019-05-10%2A0%2A0; _ga=GA1.4.601418716.1557495256; _gid=GA1.4.1118061739.1557495256", "Cookie": "OddType@21627573=H; _ga=GA1.4.773413111.1560825258; _gid=GA1.4.1960743904.1560825258; protocolstr=https; gamePoint_21627573=2019-06-18%2A2%2A", "Host": "205.201.4.177", "USER_AGENT": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36", # "ITEM_PIPELINES": { # "collectSports.pipelines.sports.SportsPipeline": 200, # } } start_url = 'https://205.201.4.177/app/member/get_game_allbets.php' def start_requests(self): url = self.start_url mongo = getMongo() zq_competitions = mongo.changeSet('zq_competition').find() for zq_competition in zq_competitions: match_id = str(zq_competition['match_id']) current_time = datetime.datetime.now() mongo.changeSet('zq_competition').update({}, {'$set': {'current_time': current_time}}, upsert=True) # mongo.collection.update({}, {$set: {update_time: ""}}, {multi: 1}) # mongo.changeSet('zq_league').insert(dict(zq_league)) # mongo.changeSet({"name":"zhangsan"}, {"$set":{"age":"25"}}) uuid = zq_competition['uuid'] form_data = { "uid": "iobou83m21627573l357852", "langx": "zh-cn", "gtype": "FT", "showtype": "FT", "gid": match_id, "ltype": "4", "date": pycomm.gmdate() } request = scrapy.FormRequest(url, formdata=form_data, callback=self.parse, dont_filter=True, meta={'match_id': match_id, 'uuid': uuid}) yield request def parse(self, response): game_list = [] games = response.xpath('/serverresponse/game') match_id = response.meta['match_id'] uuid = response.meta['uuid'] for game in games: game_odds = {} game = lxml.etree.fromstring(game.extract()) for i in game.getchildren(): game_odds[i.tag] = i.text game_list.append(game_odds) print(game_list) with open('../collectSports/conf/hg0088.json', 'r', encoding='utf8') as hg: hg0088 = json.load(hg)['root'] print(111) odd_list = [] for x in hg0088: try: x['enabled'] = game_list[0][x['prodds']] except: pass items = x['items'] new_items = [] for item in items: try: item['oddsv'] = game_list[0][item['rodds']] except: item['oddsv'] = 0 try: item['ratio'] = game_list[0][item['ratio_name']] except: item['ratio'] = "" new_items.append(item) n_i = copy.deepcopy(x) n_i['items'] = new_items odd_list.append(n_i) item = Odds() item['match_id'] = match_id item['uuid'] = uuid item['source'] = "hg0088" item['updata'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) item['content'] = odd_list yield item