import datetime import json # import re import logging import redis import scrapy from ..items import Hgjieshu from ..settings import R_HOST, R_PASSWORD, R_POST, R_DB class HgjieshuSpider(scrapy.Spider): name = 'jieshu' to_day = datetime.datetime.now() allowed_domains = ['hg3535z.com'] custom_settings = { "ITEM_PIPELINES": { 'hg3535.pipeline.jieshu.Jieshuqiupipeline': 300, }, # 'LOG_LEVEL': 'DEBUG', # 'LOG_FILE': "../hg3535/log/saiguo{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day) } rls = redis.Redis(host=R_HOST, port=R_POST, db=R_DB, password=R_PASSWORD) def start_requests(self): match_ids = self.rls.smembers("hg3535.gunqiu.ids") if match_ids: for match_id in match_ids: match_id = match_id.decode() url = 'https://odata.jiushan6688.com/odds6i/d/getamodds/zh-cn/eid/{}/iip/true/ubt/am/isp/false'.format(match_id) yield scrapy.Request(url=url, callback=self.parse, dont_filter=True) def parse(self, response): logger = logging.getLogger(__name__) try: data = json.loads(response.text) status = data['i'][0] if not status: ball = data['i'][31] match_id = data['i'][2] self.rls.srem('hg3535.gunqiu.ids', match_id) item = Hgjieshu() item['ball'] = ball item['match_id'] = match_id # item['status'] = status yield item except Exception as e: logger.warning(e) return