|
@@ -1,11 +1,84 @@
|
|
|
# -*- coding: utf-8 -*-
|
|
# -*- coding: utf-8 -*-
|
|
|
import scrapy
|
|
import scrapy
|
|
|
-
|
|
|
|
|
|
|
+import re
|
|
|
|
|
|
|
|
class LanqiuSpider(scrapy.Spider):
|
|
class LanqiuSpider(scrapy.Spider):
|
|
|
name = 'lanqiu'
|
|
name = 'lanqiu'
|
|
|
allowed_domains = ['m.hgg070.com/']
|
|
allowed_domains = ['m.hgg070.com/']
|
|
|
start_urls = ['http://m.hgg070.com//']
|
|
start_urls = ['http://m.hgg070.com//']
|
|
|
|
|
+ remath=re.compile("篮球")
|
|
|
|
|
+ # custom_settings={
|
|
|
|
|
+ # "ITEM_PIPELINES": {
|
|
|
|
|
+ # "collectSports.pipelines.zuqiu.ZuqiuPipeline": 200,
|
|
|
|
|
+ # },
|
|
|
|
|
+ # }
|
|
|
|
|
+ def start_requests(self):
|
|
|
|
|
+ #今日,早盘
|
|
|
|
|
+ h_types=[('FT'),('FU')]
|
|
|
|
|
+ headers = {
|
|
|
|
|
+ 'Accept': '*/*',
|
|
|
|
|
+ 'Accept-Encoding': 'gzip, deflate',
|
|
|
|
|
+ 'Accept-Language': 'zh-CN,zh;q=0.9',
|
|
|
|
|
+ 'Connection': 'keep-alive',
|
|
|
|
|
+ 'Content-Length': '130',
|
|
|
|
|
+ 'Content-type': 'application/x-www-form-urlencoded',
|
|
|
|
|
+ 'Cookie': '_ga=GA1.2.471918301.1572059707; _gid=GA1.2.2109447865.1572059707; _gat=1',
|
|
|
|
|
+ 'Host': 'm.hgg070.com',
|
|
|
|
|
+ 'Origin': 'http://m.hgg070.com',
|
|
|
|
|
+ 'Referer': 'http://m.hgg070.com/',
|
|
|
|
|
+ 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Mobile Safari/537.36'
|
|
|
|
|
+ }
|
|
|
|
|
+ url = "http://m.hgg070.com/app/member/get_league_list.php"
|
|
|
|
|
+ for item in h_types:
|
|
|
|
|
+ showtype = item
|
|
|
|
|
+ data={
|
|
|
|
|
+ 'uid': '7554a670e92d06105fe567b75e5b80fe65e6e40167f4979c8d74ca5eaa461d4d',
|
|
|
|
|
+ 'langx': 'zh-cn',
|
|
|
|
|
+ 'ltype': '3',
|
|
|
|
|
+ 'gtype': 'BK',
|
|
|
|
|
+ 'showtype': showtype,
|
|
|
|
|
+ 'sorttype': '',
|
|
|
|
|
+ 'date': '',
|
|
|
|
|
+ 'isP': ''
|
|
|
|
|
+ }
|
|
|
|
|
+ yield scrapy.FormRequest(url=url,formdata=data,callback=self.parse,headers=headers,
|
|
|
|
|
+ meta={"data":data}, dont_filter=True)
|
|
|
|
|
|
|
|
def parse(self, response):
|
|
def parse(self, response):
|
|
|
- pass
|
|
|
|
|
|
|
+ #获取id并判断抓取的球型
|
|
|
|
|
+ data=response.meta["data"]
|
|
|
|
|
+ league=response.xpath('//league')
|
|
|
|
|
+ url="http://m.hgg070.com/app/member/get_game_list.php"
|
|
|
|
|
+ for le in league:
|
|
|
|
|
+ name=le.xpath('./league_name/text()').extract_first()
|
|
|
|
|
+ if len(self.remath.findall(name))>0:
|
|
|
|
|
+ lid = le.xpath('./league_id/text()').extract_first()
|
|
|
|
|
+ # 抓取今日
|
|
|
|
|
+ if data["showtype"]=="FT":
|
|
|
|
|
+ data['lid'],data['sorttype'],data['date']=lid,'league',''
|
|
|
|
|
+ # 抓取早盘
|
|
|
|
|
+ elif data["showtype"]=="FU":
|
|
|
|
|
+ data['lid'], data['sorttype'], data['date'] = lid, 'league', 'all'
|
|
|
|
|
+ print('77777777777777777777777777777777777777777777L',data)
|
|
|
|
|
+ yield scrapy.FormRequest(url=url,formdata=data,callback=self.detailball,meta={"data":response.meta["data"]},dont_filter=True)
|
|
|
|
|
+
|
|
|
|
|
+ def detailball(self,response):
|
|
|
|
|
+ data=response.meta["data"]
|
|
|
|
|
+ url=""
|
|
|
|
|
+ #获取联赛id gid
|
|
|
|
|
+ game=response.xpath("//game")
|
|
|
|
|
+ for g in game:
|
|
|
|
|
+ gid=g.xpath("./gid/text()").extract_first()
|
|
|
|
|
+ data["gid"]=gid
|
|
|
|
|
+ print('wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww',data)
|
|
|
|
|
+ yield scrapy.FormRequest(url=url,formdata=data,callback=self.getItem,dont_filter=True)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ def getItem(self,response):
|
|
|
|
|
+ print('ffffffffffffffffffffffffffffffffffffffffffffffffffffff',response.text)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|