| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158 |
- # -*- coding: utf-8 -*-
- import scrapy
- from ..items import LanqiuItem
- import copy
- import lxml.etree
- import re, os, json
- from ..utils.helper import Helper
- import time
- from ..items import LanqiuItem
- import xmltodict
- class LqSportsSpider(scrapy.Spider):
- name = 'lanqiu'
- allowed_domains = ['m.hgg070.com/']
- start_urls = ['http://m.hgg070.com//']
- remath = re.compile("篮球")
- custom_settings = {
- "ITEM_PIPELINES": {
- "hgg070_spider.pipelines.lanqiu.LanqiuPipeline": 200,
- },
- }
- def start_requests(self):
- # 今日,早盘
- h_types = [('FT'), ('FU')]
- headers = {
- 'Accept': '*/*',
- 'Accept-Encoding': 'gzip, deflate',
- 'Accept-Language': 'zh-CN,zh;q=0.9',
- 'Connection': 'keep-alive',
- 'Content-Length': '130',
- 'Content-type': 'application/x-www-form-urlencoded',
- 'Cookie': '_ga=GA1.2.471918301.1572059707; _gid=GA1.2.2109447865.1572059707; _gat=1',
- 'Host': 'm.hgg070.com',
- 'Origin': 'http://m.hgg070.com',
- 'Referer': 'http://m.hgg070.com/',
- 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Mobile Safari/537.36'
- }
- url = "http://m.hgg070.com/app/member/get_league_list.php"
- for item in h_types:
- showtype = item
- data = {
- 'uid': 'a8b9b2facd6b19ab7023a2b8686207d4ea98c3ab68e455abe8fe49a4861ff68f',
- 'langx': 'zh-cn',
- 'ltype': '3',
- 'gtype': 'BK',
- 'showtype': showtype,
- 'sorttype': '',
- 'date': '',
- 'isP': ''
- }
- yield scrapy.FormRequest(url=url, formdata=data, callback=self.parse, headers=headers,
- meta={"data": data}, dont_filter=True)
- def parse(self, response):
- # 获取id并判断抓取的球型
- data = response.meta["data"]
- fromdata = copy.deepcopy(data)
- league = response.xpath('//league')
- url = "http://m.hgg070.com/app/member/get_game_list.php"
- for le in league:
- name = le.xpath('./league_name/text()').extract_first()
- if len(self.remath.findall(name)) > 0:
- lid = le.xpath('./league_id/text()').extract_first()
- # 抓取今日
- if data["showtype"] == "FT":
- data['lid'], data['sorttype'], data['date'] = lid, 'league', ''
- # 抓取早盘
- elif data["showtype"] == "FU":
- data['lid'], data['sorttype'], data['date'] = lid, 'league', 'all'
- yield scrapy.FormRequest(url=url, formdata=data, callback=self.detailball, meta={"data": fromdata},
- dont_filter=True)
- def detailball(self, response):
- data = response.meta["data"]
- url = "http://m.hgg070.com/app/member/get_game_more.php"
- # 获取联赛id gid
- game = response.xpath("//game")
- for g in game:
- gid = g.xpath("./gid/text()").extract_first()
- more_count = g.xpath("./more_count/text()").extract_first()
- data["gid"] = gid
- yield scrapy.FormRequest(url=url, formdata=data, callback=self.getItem,
- meta={"more_count": more_count, "isP": data["isP"]}, dont_filter=True)
- def getItem(self, response):
- more_count = response.meta["more_count"]
- isP = response.meta["isP"]
- showtype = response.xpath('//serverresponse/showtype')
- data = xmltodict.parse(response.text)['serverresponse']['game']
- game_lists = [i for i in data if i['gopen'] == 'Y']
- if game_lists:
- for gl in game_lists:
- cpath = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
- with open(cpath + "/conf/hgg070.json", encoding='utf8') as hg:
- hgg = json.load(hg)['bk']
- datetime = gl['datetime'][:-8] + " " + gl['datetime'][-8:]
- team_h = gl['team_h']
- team_c = gl['team_c']
- league_id = gl['gidm']
- match_id = gl.get('gid', '')
- match_uid = Helper.genearte_uuid(team_h + team_c + datetime)
- data = []
- for hg in hgg:
- items = hg['items']
- if gl[hg['prodds']] == 'Y':
- for x in items:
- odds_code = gl[x['rodds']]
- p_code = gl[hg['prodds']]
- odds = gl["ior_OUH"]
- # 有两个条件,加两条数据
- if x['ratio_name']: # 大的
- condition_u = gl[x['ratio_name']]
- odds_only = hg["plodds"] + x["lodds"] + '0' + str(odds) + "hg3535" + str(match_id)
- sole = hg["plodds"] + x["lodds"] + '0' + str(match_id) + "hg3535"
- tobj = {"match_id": match_id, "lg_id": league_id, "odds_code": odds_code, "status": 0,
- "sort": 0, "p_code": p_code,
- "odds": odds, "condition": condition_u, "odds_only": odds_only, "sole": sole,
- "source": "hgg070", "type": 0, "team": ""}
- data.append(tobj)
- if x['latio']: # 小的
- condition_s = gl[x['latio']]
- odds_only = hg["plodds"] + x["lodds"] + '0' + str(odds) + "hg3535" + str(match_id)
- sole = hg["plodds"] + x["lodds"] + '0' + str(match_id) + "hg3535"
- tobj = {"match_id": match_id, "lg_id": league_id, "odds_code": odds_code, "status": 0,
- "sort": 0, "p_code": p_code,
- "odds": odds, "condition": condition_s, "odds_only": odds_only, "sole": sole,
- "source": "hgg070", "type": 0, "team": ""}
- data.append(tobj)
- if not x['latio'] and not x['ratio_name']:
- condition_s = ''
- odds_only = hg["plodds"] + x["lodds"] + '0' + str(odds) + "hg3535" + str(match_id)
- sole = hg["plodds"] + x["lodds"] + '0' + str(match_id) + "hg3535"
- tobj = {"match_id": match_id, "lg_id": league_id, "odds_code": odds_code, "status": 0,
- "sort": 0, "p_code": p_code,
- "odds": odds, "condition": condition_s, "odds_only": odds_only, "sole": sole,
- "source": "hgg070", "type": 0, "team": ""}
- data.append(tobj)
- item = LanqiuItem()
- item['match_id'] = match_id
- item['source'] = "hg0088"
- item['updata'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
- item['content'] = data
- item['league_id'] = league_id
- item['more_count'] = more_count
- item['league'] = gl["league"]
- item['match_identity'] = match_uid
- item['datetime'] = datetime
- item['team_h'] = team_h
- item['team_c'] = team_c
- item['isP'] = isP
- item['showtype'] = showtype
- yield item
|