Эх сурвалжийг харах

Merge remote-tracking branch 'origin/master'

# Conflicts:
#	hgg070_spider/items.py
#	hgg070_spider/main.py
#	hgg070_spider/settings.py
#	hgg070_spider/spiders/lanqiu.py
#	hgg070_spider/utils/helper.py
Your Name 6 жил өмнө
parent
commit
2cfec050f4

BIN
hgg070_spider/__pycache__/items.cpython-37.pyc


BIN
hgg070_spider/__pycache__/settings.cpython-37.pyc


+ 9 - 0
hgg070_spider/items.py

@@ -12,3 +12,12 @@ class ZuqiuItem(scrapy.Field):
     data = scrapy.Field()
     data = scrapy.Field()
     index = scrapy.Field()
     index = scrapy.Field()
     tag = scrapy.Field()
     tag = scrapy.Field()
+
+
+class LanqiuItem(scrapy.Field):
+    id=scrapy.Field()
+    league=scrapy.Field()
+    team_h=scrapy.Field()
+    team_c = scrapy.Field()
+    showtype=scrapy.Field()
+    datetime=scrapy.Field()

+ 1 - 0
hgg070_spider/main.py

@@ -6,6 +6,7 @@ from scrapy.cmdline import execute
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 execute(["scrapy", "crawl", "zuqiu"])
 execute(["scrapy", "crawl", "zuqiu"])
 # execute(["scrapy", "crawl", "lanqiu"])
 # execute(["scrapy", "crawl", "lanqiu"])
+execute(["scrapy", "crawl", "lq_sports"])
 # execute(["scrapy", "crawl", "guanjun"])
 # execute(["scrapy", "crawl", "guanjun"])
 # execute(["scrapy", "crawl", "wangqiu"])
 # execute(["scrapy", "crawl", "wangqiu"])
 # execute(["scrapy", "crawl", "wqbodan"])
 # execute(["scrapy", "crawl", "wqbodan"])

+ 57 - 0
hgg070_spider/pipelines/lanqiu.py

@@ -0,0 +1,57 @@
+import logging
+from twisted.internet import defer,reactor
+from ..utils.helper import Helper
+from ..settings import LEAGUE_URL,MATCH_URL
+class ZuqiuPipeline(object):
+    @defer.inlineCallbacks
+    def process_item(self,item,spider):
+        logger=logging.getLogger(__name__)
+        logger.info("进入管道")
+        out=defer.Deferred()
+        reactor.callInThread(self._do_calculation,item,out)
+        yield out
+
+
+    def _do_calculation(self,item,out):
+        #先保存联赛
+        league_name = item['league']
+        uuid = Helper.genearte_uuid(league_name)
+        type=item['showtype']
+        is_rollball,is_today,is_morningplate = 0,0,0
+        if type=="FT":
+            is_today=1
+        elif type=="":
+            is_morningplate=1
+        else:
+            is_rollball=1
+
+        league_key = ["name_chinese", "kind", "match_mode", "if_stop", "last_time", "lg_id", "source", "uuid","is_rollball","is_today","is_morningplate"]
+        league_value = [league_name, "1", "1", "0", item['datetime'], item['id'], "hgg070", uuid,is_rollball,is_today,is_morningplate]
+        #赛事
+        childer = dict(zip(league_key, league_value))
+        #联赛
+        obj = {"game_code": "lq", "title": "league", "source": "hgg070","data":[childer]}
+        res=Helper.async_post(LEAGUE_URL,obj)
+        if res:
+            if res.get('status')==1:
+                logging.warning("联赛提交成功,{}".format(res))
+                #提交赛事
+                lres=Helper.async_post(MATCH_URL,childer)
+                if lres.get('status')==1:
+                    logging.warning("联赛提交成功,{}".format(res))
+                else:
+                    logging.warning("联赛提交失败,{}".format(res))
+
+            else:
+                logging.warning("联赛提交失败,{}".format(res))
+        else:
+            logging.warning("联赛提交失败,{}".format(res))
+
+
+
+
+
+
+
+
+

+ 7 - 7
hgg070_spider/settings.py

@@ -96,13 +96,13 @@ M_DB = 'kaiyou'
 M_PASSWORD = 'kaiyou'
 M_PASSWORD = 'kaiyou'
 # M_PASSWORD = '123456'
 # M_PASSWORD = '123456'
 
 
-# LEAGUE_URL = 'http://stadmin.bocai108.com:19093/setLeague'
-LEAGUE_URL = 'http://admin.5gogo.com/setLeague'
-# MATCH_URL = 'http://stadmin.bocai108.com:19093/setMatch'
-MATCH_URL = 'http://admin.5gogo.com/setMatch'
-# ODDS_URL = 'http://stadmin.bocai108.com:19093/setOdds'
-ODDS_URL = 'http://admin.5gogo.com/setOdds'
-TOKEN_URL = "http://stadmin.bocai108.com/getToken"
+LEAGUE_URL = 'http://admin.5gogo.com/setLeague'  #联赛
+# LEAGUE_URL = 'http://stadmin.bocai108.com/setLeague'
+MATCH_URL = 'http://admin.5gogo.com/setMatch'  #赛事
+# MATCH_URL = 'http://stadmin.bocai108.com/setMatch'
+ODDS_URL = 'http://admin.5gogo.com/setOdds'  #赔率
+# ODDS_URL = 'http://stadmin.bocai108.com/setOdds'
+TOKEN_URL = "http://admin.5gogo.com/getToken"  #Token
 MATCH_RESULT = "http://stadmin.bocai108.com:19093/setMatchResult"
 MATCH_RESULT = "http://stadmin.bocai108.com:19093/setMatchResult"
 MATCH_STATUS = "http://stadmin.bocai108.com:19093/upMatch"
 MATCH_STATUS = "http://stadmin.bocai108.com:19093/upMatch"
 ODDSCH = "http://stadmin.bocai108.com:19093/setOddsCH"
 ODDSCH = "http://stadmin.bocai108.com:19093/setOddsCH"

BIN
hgg070_spider/spiders/__pycache__/lanqiu.cpython-37.pyc


BIN
hgg070_spider/spiders/__pycache__/zuqiu.cpython-37.pyc


+ 22 - 13
hgg070_spider/spiders/lanqiu.py

@@ -1,18 +1,18 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 import scrapy
 import scrapy
 import re
 import re
-
-
+import copy
+from ..items import LanqiuItem
 class LanqiuSpider(scrapy.Spider):
 class LanqiuSpider(scrapy.Spider):
     name = 'lanqiu'
     name = 'lanqiu'
     allowed_domains = ['m.hgg070.com/']
     allowed_domains = ['m.hgg070.com/']
     start_urls = ['http://m.hgg070.com//']
     start_urls = ['http://m.hgg070.com//']
     remath=re.compile("篮球")
     remath=re.compile("篮球")
-    # custom_settings={
-    #     "ITEM_PIPELINES": {
-    #         "collectSports.pipelines.zuqiu.ZuqiuPipeline": 200,
-    #     },
-    # }
+    custom_settings={
+        "ITEM_PIPELINES": {
+            "hgg070_spider.pipelines.lanqiu.ZuqiuPipeline": 200,
+        },
+    }
     def start_requests(self):
     def start_requests(self):
         #今日,早盘
         #今日,早盘
         h_types=[('FT'),('FU')]
         h_types=[('FT'),('FU')]
@@ -33,7 +33,7 @@ class LanqiuSpider(scrapy.Spider):
         for item in h_types:
         for item in h_types:
             showtype = item
             showtype = item
             data={
             data={
-                'uid': '7554a670e92d06105fe567b75e5b80fe65e6e40167f4979c8d74ca5eaa461d4d',
+                'uid': '257853bc6f4166ca4e84f4d75d1cfc3540c6eab54b34898f4ad405cb2412402f',
                 'langx': 'zh-cn',
                 'langx': 'zh-cn',
                 'ltype': '3',
                 'ltype': '3',
                 'gtype': 'BK',
                 'gtype': 'BK',
@@ -48,6 +48,7 @@ class LanqiuSpider(scrapy.Spider):
     def parse(self, response):
     def parse(self, response):
         #获取id并判断抓取的球型
         #获取id并判断抓取的球型
         data=response.meta["data"]
         data=response.meta["data"]
+        fromdata=copy.deepcopy(data)
         league=response.xpath('//league')
         league=response.xpath('//league')
         url="http://m.hgg070.com/app/member/get_game_list.php"
         url="http://m.hgg070.com/app/member/get_game_list.php"
         for le in league:
         for le in league:
@@ -60,23 +61,31 @@ class LanqiuSpider(scrapy.Spider):
                 # 抓取早盘
                 # 抓取早盘
                 elif data["showtype"]=="FU":
                 elif data["showtype"]=="FU":
                     data['lid'], data['sorttype'], data['date'] = lid, 'league', 'all'
                     data['lid'], data['sorttype'], data['date'] = lid, 'league', 'all'
-                print('77777777777777777777777777777777777777777777L',data)
-                yield scrapy.FormRequest(url=url,formdata=data,callback=self.detailball,meta={"data":response.meta["data"]},dont_filter=True)
+                yield scrapy.FormRequest(url=url,formdata=data,callback=self.detailball,meta={"data":fromdata},dont_filter=True)
 
 
     def detailball(self,response):
     def detailball(self,response):
         data=response.meta["data"]
         data=response.meta["data"]
-        url=""
+        url="http://m.hgg070.com/app/member/get_game_more.php"
         #获取联赛id gid
         #获取联赛id gid
         game=response.xpath("//game")
         game=response.xpath("//game")
         for g in game:
         for g in game:
             gid=g.xpath("./gid/text()").extract_first()
             gid=g.xpath("./gid/text()").extract_first()
             data["gid"]=gid
             data["gid"]=gid
-            print('wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww',data)
             yield scrapy.FormRequest(url=url,formdata=data,callback=self.getItem,dont_filter=True)
             yield scrapy.FormRequest(url=url,formdata=data,callback=self.getItem,dont_filter=True)
 
 
 
 
     def getItem(self,response):
     def getItem(self,response):
-        print('ffffffffffffffffffffffffffffffffffffffffffffffffffffff',response.text)
+        data=response.xpath("//game")
+        for item in data:
+            obj=LanqiuItem()
+            obj['id']=item.xpath("./gid/text()").extract_first()
+            obj['league'] = item.xpath("./league/text()").extract_first()
+            obj['team_h'] = item.xpath("./team_h/text()").extract_first()
+            obj['team_c'] = item.xpath("./team_c/text()").extract_first()
+            obj['showtype'] = item.xpath("./gtype/text()").extract_first()
+            obj['datetime'] = item.xpath("./datetime/text()").extract_first()
+            yield obj
+
 
 
 
 
 
 

+ 119 - 0
hgg070_spider/spiders/lq_sports.py

@@ -0,0 +1,119 @@
+# -*- coding: utf-8 -*-
+import scrapy
+from ..items import LanqiuItem
+import copy
+import lxml.etree
+import re
+
+class LqSportsSpider(scrapy.Spider):
+    name = 'lq_sports'
+    allowed_domains = ['m.hgg070.com/']
+    start_urls = ['http://m.hgg070.com//']
+    remath = re.compile("篮球")
+    # custom_settings={
+    #     "ITEM_PIPELINES": {
+    #         "hgg070_spider.pipelines.lanqiu.ZuqiuPipeline": 200,
+    #     },
+    # }
+    def start_requests(self):
+        #今日,早盘
+        h_types=[('FT'),('FU')]
+        headers = {
+            'Accept': '*/*',
+            'Accept-Encoding': 'gzip, deflate',
+            'Accept-Language': 'zh-CN,zh;q=0.9',
+            'Connection': 'keep-alive',
+            'Content-Length': '130',
+            'Content-type': 'application/x-www-form-urlencoded',
+            'Cookie': '_ga=GA1.2.471918301.1572059707; _gid=GA1.2.2109447865.1572059707; _gat=1',
+            'Host': 'm.hgg070.com',
+            'Origin': 'http://m.hgg070.com',
+            'Referer': 'http://m.hgg070.com/',
+            'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Mobile Safari/537.36'
+        }
+        url = "http://m.hgg070.com/app/member/get_league_list.php"
+        for item in h_types:
+            showtype = item
+            data={
+                'uid': '86797ef15d547a503c926bb658051dd137586e25f0936536d424c09f4fb74d83',
+                'langx': 'zh-cn',
+                'ltype': '3',
+                'gtype': 'BK',
+                'showtype': showtype,
+                'sorttype': '',
+                'date': '',
+                'isP': ''
+            }
+            yield scrapy.FormRequest(url=url,formdata=data,callback=self.parse,headers=headers,
+                                      meta={"data":data}, dont_filter=True)
+
+    def parse(self, response):
+        #获取id并判断抓取的球型
+        data=response.meta["data"]
+        fromdata=copy.deepcopy(data)
+        league=response.xpath('//league')
+        url="http://m.hgg070.com/app/member/get_game_list.php"
+        for le in league:
+            name=le.xpath('./league_name/text()').extract_first()
+            if len(self.remath.findall(name))>0:
+                lid = le.xpath('./league_id/text()').extract_first()
+                # 抓取今日
+                if data["showtype"]=="FT":
+                    data['lid'],data['sorttype'],data['date']=lid,'league',''
+                # 抓取早盘
+                elif data["showtype"]=="FU":
+                    data['lid'], data['sorttype'], data['date'] = lid, 'league', 'all'
+                yield scrapy.FormRequest(url=url,formdata=data,callback=self.detailball,meta={"data":fromdata},dont_filter=True)
+
+    def detailball(self,response):
+        data=response.meta["data"]
+        url="http://m.hgg070.com/app/member/get_game_more.php"
+        #获取联赛id gid
+        game=response.xpath("//game")
+        for g in game:
+            gid=g.xpath("./gid/text()").extract_first()
+            data["gid"]=gid
+            yield scrapy.FormRequest(url=url,formdata=data,callback=self.getItem,dont_filter=True)
+
+
+    def getItem(self,response):
+        game_lists = []
+        data=response.xpath("//game")
+        if data:
+            for game in data:
+                game_odds = {}
+                gopen = game.xpath('//game/gopen/text()').extract_first()
+                if gopen == 'Y':
+                    game = lxml.etree.fromstring(game.extract())
+                    for i in game.getchildren():
+                        if i.text == None:
+                            game_odds[i.tag] = ""
+                        else:
+                            game_odds[i.tag] = i.text.replace(' ', '')
+                    game_lists.append(game_odds)
+                else:
+                    print('gopen == N, 详细赔率盘口未开启')
+
+                # for item in data:
+                #     obj=LanqiuItem()
+                #     obj['id']=item.xpath("./gid/text()").extract_first()
+                #     obj['league'] = item.xpath("./league/text()").extract_first()
+                #     obj['team_h'] = item.xpath("./team_h/text()").extract_first()
+                #     obj['team_c'] = item.xpath("./team_c/text()").extract_first()
+                #     obj['showtype'] = item.xpath("./gtype/text()").extract_first()
+                #     obj['datetime'] = item.xpath("./datetime/text()").extract_first()
+
+
+                    # item['match_id'] = item.xpath("./gid/text()").extract_first()
+                    # item['uuid'] = uid_list
+                    # item['source'] = "hg0088"
+                    # item['updata'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+                    # item['content'] = odd_list
+                    # item['gidm'] = league_id
+                    # item['tag'] = tag
+                    # item['league'] = league
+                    # item['match_uid'] = match_uid
+                    # item['datetime'] = datetime
+                    # item['team_h'] = team_h
+                    # item['team_c'] = team_c
+                    yield obj

+ 1 - 1
hgg070_spider/spiders/zuqiu.py

@@ -4,7 +4,7 @@ import lxml
 
 
 import scrapy
 import scrapy
 
 
-from items import ZuqiuItem
+from ..items import ZuqiuItem
 
 
 
 
 class ZuqiuSpider(scrapy.Spider):
 class ZuqiuSpider(scrapy.Spider):