| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109 |
- # -*- coding: utf-8 -*-
- import logging
- # import lxml
- import scrapy
- import xmltodict
- from ..items import ZuqiuItem
- class ZuqiuSpider(scrapy.Spider):
- name = 'roll_zuqiu'
- allowed_domains = ['m.hgg070.com']
- custom_settings = {
- "ITEM_PIPELINES": {
- "hgg070_spider.pipelines.roll_zuqiu.RollPipeline": 200,
- },
- # 'LOG_LEVEL': 'DEBUG',
- # 'LOG_FILE': cpath + "/log/sports_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
- }
- headers = {
- 'Accept': '*/*',
- 'Accept-Encoding': 'gzip, deflate',
- 'Accept-Language': 'zh-CN,zh;q=0.9',
- 'Content-Length': '130',
- 'Content-type': 'application/x-www-form-urlencoded',
- 'Cookie': '_ga=GA1.2.1009358217.1572056223; _gid=GA1.2.97506800.1572056223; _gat=1',
- 'Host': 'm.hgg070.com',
- 'Origin': 'http://m.hgg070.com',
- 'Proxy-Connection': 'keep-alive',
- 'Referer': 'http://m.hgg070.com/',
- 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
- }
- def start_requests(self):
- url = "http://m.hgg070.com/app/member/get_league_list.php"
- from_data = {
- 'uid': '9965a18b03dc6aacf12290bd5b8267fd7e38ec76eadf899b6108e82da5974cdd',
- 'langx': 'zh-cn',
- 'ltype': '3',
- 'gtype': 'FT',
- 'showtype': 'RB',
- 'sorttype': '',
- 'date': '',
- 'isP': ''
- }
- yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers, dont_filter=True)
- def parse(self, response):
- leagues = response.xpath('//serverresponse/game/league')
- url = 'http://m.hgg070.com/app/member/get_game_list.php'
- if leagues:
- self.headers['Content-Length'] = '141'
- for league in leagues:
- lid = league.xpath('.//league_id/text()').extract_first()
- from_data = {
- 'uid': '9965a18b03dc6aacf12290bd5b8267fd7e38ec76eadf899b6108e82da5974cdd',
- 'langx': 'zh-cn',
- 'ltype': '3',
- 'gtype': 'FT',
- 'showtype': 'RB',
- 'lid': lid,
- 'sorttype': '',
- 'date': '',
- 'isP': ''
- }
- yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers, dont_filter=True)
- else:
- print('未获取到联赛id')
- return
- def parse_match(self, response):
- url = 'http://m.hgg070.com/app/member/get_game_more.php'
- self.headers['Content-Length'] = '132'
- gids = response.xpath('//serverresponse/game/gid/text()').extract()
- tags = response.xpath('//serverresponse/game/more_count/text()').extract()
- if gids:
- for i, gid in enumerate(gids):
- from_data = {
- 'uid': '9965a18b03dc6aacf12290bd5b8267fd7e38ec76eadf899b6108e82da5974cdd',
- 'langx': 'zh-cn',
- 'ltype': '3',
- 'gtype': 'FT',
- 'showtype': 'RB',
- 'date': '',
- 'isP': '',
- 'gid': gid,
- }
- tag = tags[i]
- yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_odds, headers=self.headers,
- meta={'tag': tag}, dont_filter=True)
- def parse_odds(self, response):
- logger = logging.getLogger(__name__)
- tag = response.meta['tag']
- game = xmltodict.parse(response.text)
- try:
- game_odds = game['serverresponse']['game'][0]
- except:
- game_odds = game['serverresponse']['game']
- if game_odds['gopen'] == 'Y':
- item = ZuqiuItem()
- item['data'] = game_odds
- item['tag'] = tag
- yield item
- else:
- logger.info('gopen == "N", 详细赔率盘口未开启')
- return
|