| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- # -*- coding: utf-8 -*-
- import logging
- # import lxml
- import scrapy
- import xmltodict
- from ..items import ZuqiuItem
- class ZuqiuSpider(scrapy.Spider):
- name = 'wangqiu'
- allowed_domains = ['m.hgg070.com']
- custom_settings = {
- "ITEM_PIPELINES": {
- "hgg070_spider.pipelines.wangqiu.WangqiuPipeline": 200,
- },
- # 'LOG_LEVEL': 'DEBUG',
- # 'LOG_FILE': cpath + "/log/sports_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
- }
- headers = {
- 'Accept': '*/*',
- 'Accept-Encoding': 'gzip, deflate',
- 'Accept-Language': 'zh-CN,zh;q=0.9',
- 'Content-Length': '130',
- 'Content-type': 'application/x-www-form-urlencoded',
- 'Cookie': '_ga=GA1.2.1009358217.1572056223; _gid=GA1.2.97506800.1572056223; _gat=1',
- 'Host': 'm.hgg070.com',
- 'Origin': 'http://m.hgg070.com',
- 'Proxy-Connection': 'keep-alive',
- 'Referer': 'http://m.hgg070.com/',
- 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
- }
- def start_requests(self):
- url = "http://m.hgg070.com/app/member/get_league_list.php"
- h_types = [('FT', '', '130'), ('FU', 'P', '131'), ('FU', "", '130'), ('FU', 'P', '131')]
- for i, h_type in enumerate(h_types):
- show_type, isp, length = h_type
- self.headers['Content-Length'] = length
- from_data = {
- 'uid': 'ca93c6189b00e4c4d974f45ccb1dfd5a1c7a7e06a0292d23025673b1ed195480',
- 'langx': 'zh-cn',
- 'ltype': '3',
- 'gtype': 'TN',
- 'showtype': show_type,
- 'sorttype': '',
- 'date': '',
- 'isP': isp
- }
- yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers,
- meta={'index': i}, dont_filter=True)
- def parse(self, response):
- leagues = response.xpath('//serverresponse/game/league')
- url = 'http://m.hgg070.com/app/member/get_game_list.php'
- if leagues:
- index = response.meta['index']
- if index == 0:
- date = ''
- showtype = 'FT'
- isp = ''
- self.headers['Content-Length'] = '147'
- elif index == 2:
- date = 'all'
- showtype = 'FU'
- isp = ''
- self.headers['Content-Length'] = '150'
- else:
- date = 'all'
- showtype = 'FU'
- isp = 'P'
- self.headers['Content-Length'] = '151'
- for league in leagues:
- lid = league.xpath('.//league_id/text()').extract_first()
- from_data = {
- 'uid': 'ca93c6189b00e4c4d974f45ccb1dfd5a1c7a7e06a0292d23025673b1ed195480',
- 'langx': 'zh-cn',
- 'ltype': '3',
- 'gtype': 'TN',
- 'showtype': showtype,
- 'lid': lid,
- 'sorttype': 'league',
- 'date': date,
- 'isP': isp
- }
- yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers,
- meta={'index': index}, dont_filter=True)
- else:
- print('未获取到联赛id')
- return
- def parse_match(self, response):
- index = response.meta['index']
- url = 'http://m.hgg070.com/app/member/get_game_more.php'
- if index == 0:
- date = ''
- showtype = 'FT'
- isp = ''
- self.headers['Content-Length'] = '132'
- elif index == 1:
- date = 'all'
- showtype = 'FT'
- isp = 'P'
- self.headers['Content-Length'] = '136'
- elif index == 2:
- date = ''
- showtype = 'FU'
- isp = ''
- self.headers['Content-Length'] = '132'
- else:
- date = 'all'
- showtype = 'FU'
- isp = 'P'
- self.headers['Content-Length'] = '136'
- gids = response.xpath('//serverresponse/game/gid/text()').extract()
- tags = response.xpath('//serverresponse/game/more_count/text()').extract()
- if gids:
- for i, gid in enumerate(gids):
- from_data = {
- 'uid': 'ca93c6189b00e4c4d974f45ccb1dfd5a1c7a7e06a0292d23025673b1ed195480',
- 'langx': 'zh-cn',
- 'ltype': '3',
- 'gtype': 'TN',
- 'showtype': showtype,
- 'date': date,
- 'isP': isp,
- 'gid': gid,
- }
- tag = tags[i]
- yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_odds, headers=self.headers,
- meta={'index': index, 'tag': tag}, dont_filter=True)
- def parse_odds(self, response):
- logger = logging.getLogger(__name__)
- index = response.meta['index']
- tag = response.meta['tag']
- game = xmltodict.parse(response.text)
- try:
- game_odds = game['serverresponse']['game'][0]
- except:
- game_odds = game['serverresponse']['game']
- if game_odds['gopen'] == 'Y':
- item = ZuqiuItem()
- item['data'] = game_odds
- item['index'] = index
- item['tag'] = tag
- yield item
- else:
- logger.info('gopen == "N", 详细赔率盘口未开启')
- return
|