| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- # -*- coding: utf-8 -*-
- import logging
- import lxml
- import scrapy
- from items import ZuqiuItem
- class ZuqiuSpider(scrapy.Spider):
- name = 'zuqiu'
- allowed_domains = ['m.hgg070.com']
- custom_settings = {
- "ITEM_PIPELINES": {
- "hgg070_spider.pipelines.zuqiu.ZuqiuPipeline": 200,
- },
- # 'LOG_LEVEL': 'DEBUG',
- # 'LOG_FILE': cpath + "/log/sports_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
- }
- headers = {
- 'Accept': '*/*',
- 'Accept-Encoding': 'gzip, deflate',
- 'Accept-Language': 'zh-CN,zh;q=0.9',
- 'Content-Length': '130',
- 'Content-type': 'application/x-www-form-urlencoded',
- 'Cookie': '_ga=GA1.2.1009358217.1572056223; _gid=GA1.2.97506800.1572056223; _gat=1',
- 'Host': 'm.hgg070.com',
- 'Origin': 'http://m.hgg070.com',
- 'Proxy-Connection': 'keep-alive',
- 'Referer': 'http://m.hgg070.com/',
- 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
- }
- def start_requests(self):
- url = "http://m.hgg070.com/app/member/get_league_list.php"
- h_types = [('FT', '', '130'), ('FU', 'P', '131'), ('FU', "", '130'), ('FU', 'P', '131')]
- for h_type in h_types:
- # show_type, isp, length = h_type
- show_type, isp, length = h_types[3]
- self.headers['Content-Length'] = length
- from_data = {
- 'uid': '013dc3a00cbd488238236010f78ab4a41af7e6ff05ceb96bc0854b60807a42eb',
- 'langx': 'zh-cn',
- 'ltype': '3',
- 'gtype': 'FT',
- 'showtype': show_type,
- 'sorttype': '',
- 'date': '',
- 'isP': isp
- }
- yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers,
- meta={'showtype': show_type, 'isp': isp}, dont_filter=True)
- def parse(self, response):
- if response.status == 400:
- print(response.status)
- print('parse', response.url)
- leagues = response.xpath('//serverresponse/game/league')
- url = 'http://m.hgg070.com/app/member/get_game_list.php'
- if leagues:
- showtype = response.meta['showtype']
- isp = response.meta['isp']
- if showtype == 'FT' and isp == '':
- date = ''
- self.headers['Content-Length'] = '147'
- elif showtype == 'FU' and isp == 'P':
- date = 'all'
- self.headers['Content-Length'] = '151'
- elif showtype == 'FU' and isp == '':
- date = 'all'
- self.headers['Content-Length'] = '150'
- else:
- date = 'all'
- self.headers['Content-Length'] = '151'
- for league in leagues:
- lid = league.xpath('.//league_id/text()').extract_first()
- from_data = {
- 'uid': '013dc3a00cbd488238236010f78ab4a41af7e6ff05ceb96bc0854b60807a42eb',
- 'langx': 'zh-cn',
- 'ltype': '3',
- 'gtype': 'FT',
- 'showtype': showtype,
- 'lid': lid,
- 'sorttype': 'league',
- 'date': date,
- 'isP': isp
- }
- yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers,
- meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
- else:
- print('未获取到联赛id')
- return
- def parse_match(self, response):
- if response.status == 400:
- print(response.status)
- print('parse_match', response.url)
- url = 'http://m.hgg070.com/app/member/get_game_more.php'
- showtype = response.meta['showtype']
- isp = response.meta['isp']
- if showtype == 'FT' and isp == '':
- date = ''
- self.headers['Content-Length'] = '132'
- elif showtype == 'FU' and isp == 'P':
- date = 'all'
- self.headers['Content-Length'] = '136'
- elif showtype == 'FU' and isp == '':
- date = ''
- self.headers['Content-Length'] = '132'
- else:
- date = 'all'
- self.headers['Content-Length'] = '136'
- gids = response.xpath('//serverresponse/game/gid/text()').extract()
- if gids:
- for gid in gids:
- from_data = {
- 'uid': '013dc3a00cbd488238236010f78ab4a41af7e6ff05ceb96bc0854b60807a42eb',
- 'langx': 'zh-cn',
- 'ltype': '3',
- 'gtype': 'FT',
- 'showtype': showtype,
- 'date': date,
- 'isP': isp,
- 'gid': gid,
- }
- yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_odds, headers=self.headers,
- meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
- def parse_odds(self, response):
- # print(response.text)
- # game_lists = []
- if response.status == 400:
- print(response.status)
- print('parse_odds', response.url)
- game = response.xpath('//serverresponse/game')[0]
- logger = logging.getLogger(__name__)
- if game:
- game_odds = {}
- gopen = game.xpath('//game/gopen/text()').extract_first()
- if gopen == 'Y':
- game = lxml.etree.fromstring(game.extract())
- for i in game.getchildren():
- if i.text == None:
- game_odds[i.tag] = ""
- else:
- game_odds[i.tag] = i.text
- else:
- logger.info('gopen == "N", 详细赔率盘口未开启')
- item = ZuqiuItem()
- item['all'] = game_odds
- yield item
|