# -*- coding: utf-8 -*- import copy import datetime import json import scrapy from scrapy.http import Request from .. items import Guanjun class LqguanjunSpider(scrapy.Spider): name = 'guanjun' allowed_domains = ['hg3535z.com'] to_day = datetime.datetime.now() custom_settings = { "ITEM_PIPELINES": { 'hg3535.pipelines.Guanjunpipeline': 300, }, 'LOG_LEVEL': 'DEBUG', 'LOG_FILE': "../hg3535/log/guanjun_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day) } def start_requests(self): for z in range(1, 5): # for y in range(1, 4): # for x in range(0, 4): # url = 'https://hg3535z.com/odds2/d/getodds?sid=' + str(z) + '&pt=' + str(y) + '&ubt=or&pn=' + str( # x) + '&sb=2&dc=null&pid=0' # # yield Request(url=url, callback=self.parse, meta={'pt': y, 'sid': z}, dont_filter=True) # # https://hg3535z.com/odds2/d/getodds?sid=1&pt=1&ubt=am&pn=0&sb=2&dc=null&pid=0 # # https://hg3535z.com/odds2/d/getodds?sid=1&pt=1&ubt=or&pn=0&sb=2&dc=null&pid=0 # yield Request(url=url, callback=self.parse, meta={'pt': y, 'sid': z}, dont_filter=True) # for y in range(1, 4): for x in range(0, 4): url = 'https://hg3535z.com/odds2/d/getodds?sid=' + str(z) + '&pt=1&ubt=or&pn=' + str( x) + '&sb=2&dc=null&pid=0' # yield Request(url=url, callback=self.parse, meta={'pt': y, 'sid': z}, dont_filter=True) # https://hg3535z.com/odds2/d/getodds?sid=1&pt=1&ubt=am&pn=0&sb=2&dc=null&pid=0 # https://hg3535z.com/odds2/d/getodds?sid=1&pt=1&ubt=or&pn=0&sb=2&dc=null&pid=0 yield Request(url=url, callback=self.parse, meta={'pt': 1, 'sid': z}, dont_filter=True) def parse(self, response): try: datas = json.loads(response.text) except: datas = "" try: results = datas['n-ot']['egs'] except: results = "" try: pt = copy.copy(response.meta['pt']) except: pt = 0 try: ball = datas['n-ot']['s']['n'] except: ball = "" if results: for result in results: # 联赛id league_id = result['c']['k'] # 联赛名 league_name = result['c']['n'] new_results = result['es'] for new_result in new_results: data_game = new_result['i'][4] time_game = new_result['i'][5] new_league_name = new_result['egn'].replace('\t', '') for i in new_result['n-o']: new_champion = i['mn'] for y in i['o']: item = Guanjun() game_id = str(new_result['i'][16]) tema_home = y[0] champion_team = float(y[2]) - 1 item['league_name'] = league_name item['tema_home'] = tema_home item['league_id'] = league_id item['game_id'] = game_id item['data_game'] = data_game item['time_game'] = time_game item['new_league_name'] = new_league_name item['champion_team'] = champion_team item['new_champion'] = new_champion item['pt'] = pt item['ball'] = ball yield item