| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- # -*- coding: utf-8 -*-
- import scrapy
- import re
- class LanqiuSpider(scrapy.Spider):
- name = 'lanqiu'
- allowed_domains = ['m.hgg070.com/']
- start_urls = ['http://m.hgg070.com//']
- remath=re.compile("篮球")
- # custom_settings={
- # "ITEM_PIPELINES": {
- # "collectSports.pipelines.zuqiu.ZuqiuPipeline": 200,
- # },
- # }
- def start_requests(self):
- #今日,早盘
- h_types=[('FT'),('FU')]
- headers = {
- 'Accept': '*/*',
- 'Accept-Encoding': 'gzip, deflate',
- 'Accept-Language': 'zh-CN,zh;q=0.9',
- 'Connection': 'keep-alive',
- 'Content-Length': '130',
- 'Content-type': 'application/x-www-form-urlencoded',
- 'Cookie': '_ga=GA1.2.471918301.1572059707; _gid=GA1.2.2109447865.1572059707; _gat=1',
- 'Host': 'm.hgg070.com',
- 'Origin': 'http://m.hgg070.com',
- 'Referer': 'http://m.hgg070.com/',
- 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Mobile Safari/537.36'
- }
- url = "http://m.hgg070.com/app/member/get_league_list.php"
- for item in h_types:
- showtype = item
- data={
- 'uid': '7554a670e92d06105fe567b75e5b80fe65e6e40167f4979c8d74ca5eaa461d4d',
- 'langx': 'zh-cn',
- 'ltype': '3',
- 'gtype': 'BK',
- 'showtype': showtype,
- 'sorttype': '',
- 'date': '',
- 'isP': ''
- }
- yield scrapy.FormRequest(url=url,formdata=data,callback=self.parse,headers=headers,
- meta={"data":data}, dont_filter=True)
- def parse(self, response):
- #获取id并判断抓取的球型
- data=response.meta["data"]
- league=response.xpath('//league')
- url="http://m.hgg070.com/app/member/get_game_list.php"
- for le in league:
- name=le.xpath('./league_name/text()').extract_first()
- if len(self.remath.findall(name))>0:
- lid = le.xpath('./league_id/text()').extract_first()
- # 抓取今日
- if data["showtype"]=="FT":
- data['lid'],data['sorttype'],data['date']=lid,'league',''
- # 抓取早盘
- elif data["showtype"]=="FU":
- data['lid'], data['sorttype'], data['date'] = lid, 'league', 'all'
- print('77777777777777777777777777777777777777777777L',data)
- yield scrapy.FormRequest(url=url,formdata=data,callback=self.detailball,meta=response.meta["data"],dont_filter=True)
- def detailball(self,response):
- print('******************************************************88',response.text)
|