juan 6 жил өмнө
parent
commit
eda9360313

BIN
hgg070_spider/__pycache__/settings.cpython-37.pyc


+ 1 - 1
hgg070_spider/main.py

@@ -4,8 +4,8 @@ from scrapy.cmdline import execute
 
 # print(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
-execute(["scrapy", "crawl", "lanqiu"])
 # execute(["scrapy", "crawl", "lanqiu"])
+execute(["scrapy", "crawl", "lq_sports"])
 # execute(["scrapy", "crawl", "guanjun"])
 # execute(["scrapy", "crawl", "wangqiu"])
 # execute(["scrapy", "crawl", "wqbodan"])

+ 2 - 4
hgg070_spider/pipelines/lanqiu.py

@@ -5,7 +5,6 @@ from ..settings import LEAGUE_URL,MATCH_URL
 class ZuqiuPipeline(object):
     @defer.inlineCallbacks
     def process_item(self,item,spider):
-        print('---------------------------------------------------------------------')
         logger=logging.getLogger(__name__)
         logger.info("进入管道")
         out=defer.Deferred()
@@ -26,13 +25,12 @@ class ZuqiuPipeline(object):
         else:
             is_rollball=1
 
-        league_key = ["name_chinese", "kind", "match_mode", "if_stop", "last_time", "lg_id", "source", "uuid"]
-        league_value = [league_name, "1", "1", "0", item['datetime'], item['id'], "hgg070", uuid]
+        league_key = ["name_chinese", "kind", "match_mode", "if_stop", "last_time", "lg_id", "source", "uuid","is_rollball","is_today","is_morningplate"]
+        league_value = [league_name, "1", "1", "0", item['datetime'], item['id'], "hgg070", uuid,is_rollball,is_today,is_morningplate]
         #赛事
         childer = dict(zip(league_key, league_value))
         #联赛
         obj = {"game_code": "lq", "title": "league", "source": "hgg070","data":[childer]}
-        # obj={"uuid":uuid,"league_id":item['id'],"league_name":league_name}
         res=Helper.async_post(LEAGUE_URL,obj)
         if res:
             if res.get('status')==1:

+ 119 - 0
hgg070_spider/spiders/lq_sports.py

@@ -0,0 +1,119 @@
+# -*- coding: utf-8 -*-
+import scrapy
+from ..items import LanqiuItem
+import copy
+import lxml.etree
+import re
+
+class LqSportsSpider(scrapy.Spider):
+    name = 'lq_sports'
+    allowed_domains = ['m.hgg070.com/']
+    start_urls = ['http://m.hgg070.com//']
+    remath = re.compile("篮球")
+    # custom_settings={
+    #     "ITEM_PIPELINES": {
+    #         "hgg070_spider.pipelines.lanqiu.ZuqiuPipeline": 200,
+    #     },
+    # }
+    def start_requests(self):
+        #今日,早盘
+        h_types=[('FT'),('FU')]
+        headers = {
+            'Accept': '*/*',
+            'Accept-Encoding': 'gzip, deflate',
+            'Accept-Language': 'zh-CN,zh;q=0.9',
+            'Connection': 'keep-alive',
+            'Content-Length': '130',
+            'Content-type': 'application/x-www-form-urlencoded',
+            'Cookie': '_ga=GA1.2.471918301.1572059707; _gid=GA1.2.2109447865.1572059707; _gat=1',
+            'Host': 'm.hgg070.com',
+            'Origin': 'http://m.hgg070.com',
+            'Referer': 'http://m.hgg070.com/',
+            'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Mobile Safari/537.36'
+        }
+        url = "http://m.hgg070.com/app/member/get_league_list.php"
+        for item in h_types:
+            showtype = item
+            data={
+                'uid': '86797ef15d547a503c926bb658051dd137586e25f0936536d424c09f4fb74d83',
+                'langx': 'zh-cn',
+                'ltype': '3',
+                'gtype': 'BK',
+                'showtype': showtype,
+                'sorttype': '',
+                'date': '',
+                'isP': ''
+            }
+            yield scrapy.FormRequest(url=url,formdata=data,callback=self.parse,headers=headers,
+                                      meta={"data":data}, dont_filter=True)
+
+    def parse(self, response):
+        #获取id并判断抓取的球型
+        data=response.meta["data"]
+        fromdata=copy.deepcopy(data)
+        league=response.xpath('//league')
+        url="http://m.hgg070.com/app/member/get_game_list.php"
+        for le in league:
+            name=le.xpath('./league_name/text()').extract_first()
+            if len(self.remath.findall(name))>0:
+                lid = le.xpath('./league_id/text()').extract_first()
+                # 抓取今日
+                if data["showtype"]=="FT":
+                    data['lid'],data['sorttype'],data['date']=lid,'league',''
+                # 抓取早盘
+                elif data["showtype"]=="FU":
+                    data['lid'], data['sorttype'], data['date'] = lid, 'league', 'all'
+                yield scrapy.FormRequest(url=url,formdata=data,callback=self.detailball,meta={"data":fromdata},dont_filter=True)
+
+    def detailball(self,response):
+        data=response.meta["data"]
+        url="http://m.hgg070.com/app/member/get_game_more.php"
+        #获取联赛id gid
+        game=response.xpath("//game")
+        for g in game:
+            gid=g.xpath("./gid/text()").extract_first()
+            data["gid"]=gid
+            yield scrapy.FormRequest(url=url,formdata=data,callback=self.getItem,dont_filter=True)
+
+
+    def getItem(self,response):
+        game_lists = []
+        data=response.xpath("//game")
+        if data:
+            for game in data:
+                game_odds = {}
+                gopen = game.xpath('//game/gopen/text()').extract_first()
+                if gopen == 'Y':
+                    game = lxml.etree.fromstring(game.extract())
+                    for i in game.getchildren():
+                        if i.text == None:
+                            game_odds[i.tag] = ""
+                        else:
+                            game_odds[i.tag] = i.text.replace(' ', '')
+                    game_lists.append(game_odds)
+                else:
+                    print('gopen == N, 详细赔率盘口未开启')
+
+                # for item in data:
+                #     obj=LanqiuItem()
+                #     obj['id']=item.xpath("./gid/text()").extract_first()
+                #     obj['league'] = item.xpath("./league/text()").extract_first()
+                #     obj['team_h'] = item.xpath("./team_h/text()").extract_first()
+                #     obj['team_c'] = item.xpath("./team_c/text()").extract_first()
+                #     obj['showtype'] = item.xpath("./gtype/text()").extract_first()
+                #     obj['datetime'] = item.xpath("./datetime/text()").extract_first()
+
+
+                    # item['match_id'] = item.xpath("./gid/text()").extract_first()
+                    # item['uuid'] = uid_list
+                    # item['source'] = "hg0088"
+                    # item['updata'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+                    # item['content'] = odd_list
+                    # item['gidm'] = league_id
+                    # item['tag'] = tag
+                    # item['league'] = league
+                    # item['match_uid'] = match_uid
+                    # item['datetime'] = datetime
+                    # item['team_h'] = team_h
+                    # item['team_c'] = team_c
+                    yield obj