Odin
/
hgg070


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
							# -*- coding: utf-8 -*-
import scrapy
import re
import copy
class LanqiuSpider(scrapy.Spider):
    name = 'lanqiu'
    allowed_domains = ['m.hgg070.com/']
    start_urls = ['http://m.hgg070.com//']
    remath=re.compile("篮球")
    # custom_settings={
    #     "ITEM_PIPELINES": {
    #         "collectSports.pipelines.zuqiu.ZuqiuPipeline": 200,
    #     },
    # }
    def start_requests(self):
        #今日，早盘
        h_types=[('FT'),('FU')]
        headers = {
            'Accept': '*/*',
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Connection': 'keep-alive',
            'Content-Length': '130',
            'Content-type': 'application/x-www-form-urlencoded',
            'Cookie': '_ga=GA1.2.471918301.1572059707; _gid=GA1.2.2109447865.1572059707; _gat=1',
            'Host': 'm.hgg070.com',
            'Origin': 'http://m.hgg070.com',
            'Referer': 'http://m.hgg070.com/',
            'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Mobile Safari/537.36'
        }
        url = "http://m.hgg070.com/app/member/get_league_list.php"
        for item in h_types:
            showtype = item
            data={
                'uid': '9b9f0dea34ee4e5225990433370a88fea2b37b79c98e266b338152c619a71d2b',
                'langx': 'zh-cn',
                'ltype': '3',
                'gtype': 'BK',
                'showtype': showtype,
                'sorttype': '',
                'date': '',
                'isP': ''
            }
            yield scrapy.FormRequest(url=url,formdata=data,callback=self.parse,headers=headers,
                                      meta={"data":data}, dont_filter=True)

    def parse(self, response):
        #获取id并判断抓取的球型
        data=response.meta["data"]
        fromdata=copy.deepcopy(data)
        league=response.xpath('//league')
        url="http://m.hgg070.com/app/member/get_game_list.php"
        for le in league:
            name=le.xpath('./league_name/text()').extract_first()
            if len(self.remath.findall(name))>0:
                lid = le.xpath('./league_id/text()').extract_first()
                # 抓取今日
                if data["showtype"]=="FT":
                    data['lid'],data['sorttype'],data['date']=lid,'league',''
                # 抓取早盘
                elif data["showtype"]=="FU":
                    data['lid'], data['sorttype'], data['date'] = lid, 'league', 'all'
                yield scrapy.FormRequest(url=url,formdata=data,callback=self.detailball,meta={"data":fromdata},dont_filter=True)

    def detailball(self,response):
        data=response.meta["data"]
        url="http://m.hgg070.com/app/member/get_game_more.php"
        #获取联赛id gid
        game=response.xpath("//game")
        for g in game:
            gid=g.xpath("./gid/text()").extract_first()
            data["gid"]=gid
            print('wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww',data)
            yield scrapy.FormRequest(url=url,formdata=data,callback=self.getItem,dont_filter=True)


    def getItem(self,response):
        print('ffffffffffffffffffffffffffffffffffffffffffffffffffffff',response.text)