| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109 |
- # -*- coding: utf-8 -*-
- import copy
- import time
- import scrapy
- import lxml.etree
- import pycomm
- import json
- from collectSports.biz import getMongo
- from collectSports.items import Odds
- from collectSports.spiders.setdata import hash_func
- class SportsSpider(scrapy.Spider):
- name = 'sports'
- allowed_domains = ['hg0088.com']
- # start_urls = ['http://hg0088.com/']
- custom_settings = {
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
- "Accept-Encoding": "gzip, deflate",
- "Accept-Language": "zh-CN,zh;q=0.8",
- "Cache-Control": "max-age=0",
- "Connection": "keep-alive",
- # "Cookie": "OddType@21627573=H; protocolstr=http; gamePoint_21627573=2019-05-10%2A0%2A0; _ga=GA1.4.601418716.1557495256; _gid=GA1.4.1118061739.1557495256",
- "Cookie": "OddType@21627573=H; _ga=GA1.4.773413111.1560825258; _gid=GA1.4.1960743904.1560825258; protocolstr=https; gamePoint_21627573=2019-06-18%2A2%2A0; _gat_UA-75448111-1=1",
- "Host": "205.201.4.177",
- "Origin": "https://205.201.4.177",
- "USER_AGENT": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36",
- "ITEM_PIPELINES": {
- "collectSports.pipelines.sports.SportsPipeline": 200,
- }
- }
- start_url = 'https://205.201.4.177/app/member/get_game_allbets.php'
- def start_requests(self):
- url = self.start_url
- mongo = getMongo()
- zq_competitions = mongo.changeSet('zq_competition').find()
- for zq_competition in zq_competitions:
- match_id = str(zq_competition['match_id'])
- # uuid = zq_competition['uuid']
- form_data = {
- "uid": "jjdpzpc07m21627573l376834",
- "langx": "zh-cn",
- "gtype": "FT",
- "showtype": "FT",
- "gid": match_id,
- "ltype": "4",
- "date": pycomm.gmdate()
- }
- request = scrapy.FormRequest(url, formdata=form_data, callback=self.parse, dont_filter=True, meta={'match_id': match_id})
- yield request
- def parse(self, response):
- game_list = []
- games = response.xpath('/serverresponse/game')
- match_id = response.meta['match_id']
- uid_list = []
- for game in games:
- game_odds = {}
- game = lxml.etree.fromstring(game.extract())
- for i in game.getchildren():
- game_odds[i.tag] = i.text
- game_list.append(game_odds)
- with open('../collectSports/conf/hg0088.json', 'r', encoding='utf8') as hg:
- hg0088 = json.load(hg)['root']
- odd_list = []
- for x in hg0088:
- try:
- enabled = game_list[0][x['prodds']]
- if enabled == 'Y':
- x['enabled'] = 1
- else:
- x['enabled'] = 0
- except:
- x['enabled'] = 0
- items = x['items']
- new_items = []
- for y in items:
- try:
- y['oddsv'] = game_list[0][y['rodds']]
- except:
- y['oddsv'] = 0
- try:
- y['ratio'] = game_list[0][y['ratio_name']]
- except:
- y['ratio'] = ""
- uid = hash_func(y['lodds'], y['rodds'], y['ratio'], y['ratio_name'], y['oddsv'])
- y['uid'] = uid
- uid_list.append(uid)
- new_items.append(y)
- n_i = copy.deepcopy(x)
- n_i['items'] = new_items
- odd_list.append(n_i)
- if game_list:
- gidm = game_list[0].get('gidm', '')
- else:
- gidm = ''
- # print(response.text)
- item = Odds()
- item['match_id'] = match_id
- item['uuid'] = uid_list
- item['source'] = "hg0088"
- item['updata'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
- item['content'] = odd_list
- item['gidm'] = gidm
- yield item
|