juan 6 лет назад
Родитель
Сommit
d00ae966ee

+ 2 - 2
hgg070_spider/main.py

@@ -5,8 +5,8 @@ from scrapy.cmdline import execute
 # print(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 # execute(["scrapy", "crawl", "zuqiu"])
-# execute(["scrapy", "crawl", "lanqiu"])
-execute(["scrapy", "crawl", "lq_sports"])
+execute(["scrapy", "crawl", "lanqiu"])
+# execute(["scrapy", "crawl", "lq_sports"])
 # execute(["scrapy", "crawl", "guanjun"])
 # execute(["scrapy", "crawl", "wangqiu"])
 # execute(["scrapy", "crawl", "wqbodan"])

+ 0 - 1
hgg070_spider/pipelines/lq_sports.py → hgg070_spider/pipelines/aaaaa.py

@@ -6,7 +6,6 @@ import time
 class LqSportsPipeline(object):
     @defer.inlineCallbacks
     def process_item(self,item,spider):
-        print('555555555555555555555555555555555555555555555555555555555555555555555')
         logger=logging.getLogger(__name__)
         logger.info("进入管道")
         out=defer.Deferred()

+ 33 - 10
hgg070_spider/pipelines/lanqiu.py

@@ -2,9 +2,9 @@ import logging
 from twisted.internet import defer,reactor
 from ..utils.helper import Helper
 from ..settings import LEAGUE_URL,MATCH_URL
-import pymongo
-from ..settings import M_HOST,M_USER,M_PASSWORD,M_POST,M_DB
-class ZuqiuPipeline(object):
+import pymongo,time
+from ..settings import M_HOST,M_USER,M_PASSWORD,M_POST,M_DB,ODDS_URL
+class LanqiuPipeline(object):
     def open_spider(self, spider):
         self.mongo = pymongo.MongoClient(host=M_HOST, username=M_USER, password=M_PASSWORD, port=M_POST,
                                          authSource=M_DB)
@@ -34,27 +34,50 @@ class ZuqiuPipeline(object):
         else:
             is_stringscene=1
         league_key = ["name_chinese", "kind", "match_mode", "if_stop", "last_time", "lg_id", "source", "uuid","is_rollball","is_today","is_morningplate","is_stringscene"]
-        league_value = [league_name, "1", "1", "0", item['datetime'], item['id'], "hgg070", uuid,is_rollball,is_today,is_morningplate,is_stringscene]
+        league_value = [league_name, "1", "1", "0", item['datetime'], item['match_id'], "hgg070", uuid,is_rollball,is_today,is_morningplate,is_stringscene]
         #赛事
         childer = dict(zip(league_key, league_value))
         #联赛
         obj = {"game_code": "lq", "title": "league", "source": "hgg070","data":[childer]}
         res=Helper.async_post(LEAGUE_URL,obj)
-
         if res:
             if res.get('status')==1:
-                logging.warning("联赛提交成功,{}".format(res))
+                logging.warning("联赛提交成功,{}1".format(res))
                 #提交赛事
                 lres=Helper.async_post(MATCH_URL,childer)
+                print('333333333333333333333333333333333333333333',lres)
                 if lres.get('status')==1:
-                    logging.warning("联赛提交成功,{}".format(res))
+                    logging.warning("赛事提交成功,{}2".format(res))
+                    #保存赔率
+                    # 是否串场
+                    if item['isP'] == 'P':
+                        ris_stringscene = 1
+                    else:
+                        ris_stringscene = 0
+                    # 现在时间,时间戳
+                    utime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+                    odds_key = ["game_code", "title", "match_id", "lg_id", "data", "source", "odds_only", "tag", "uuid",
+                                "is_stringscene", "utime", "pt", 'match_identity']
+                    odds_value = ["lq", "odds", item['match_id'], item['league_id'], item["content"], "hgg070", [],
+                                  item['more_count'], uuid,
+                                  ris_stringscene, utime, item['isP'], item["match_identity"]]
+                    # 赛事
+                    odderlist = dict(zip(odds_key, odds_value))
+                    res = Helper.async_post(ODDS_URL, odderlist)
+                    if res:
+                        if res.get('status') == 1:
+                            logging.warning("赔率提交成功,{}3".format(res))
+                        else:
+                            logging.warning("赔率提交失败,{}4".format(res))
+                    else:
+                        logging.warning("赔率提交失败,{}5".format(res))
                 else:
-                    logging.warning("联赛提交失败,{}".format(res))
+                    logging.warning("赛提交失败,{}6".format(res))
 
             else:
-                logging.warning("联赛提交失败,{}".format(res))
+                logging.warning("联赛提交失败,{}7".format(res))
         else:
-            logging.warning("联赛提交失败,{}".format(res))
+            logging.warning("联赛提交失败,{}8".format(res))
 
 
 

BIN
hgg070_spider/spiders/__pycache__/lanqiu.cpython-37.pyc


+ 94 - 0
hgg070_spider/spiders/aaaa.py

@@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+import scrapy
+import re
+import copy
+from ..items import LanqiuItem
+class LanqiuSpider(scrapy.Spider):
+    name = 'lanqiu'
+    allowed_domains = ['m.hgg070.com/']
+    start_urls = ['http://m.hgg070.com//']
+    remath=re.compile("篮球")
+    custom_settings={
+        "ITEM_PIPELINES": {
+            "hgg070_spider.pipelines.lanqiu.ZuqiuPipeline": 200,
+        },
+    }
+    def start_requests(self):
+        #今日,早盘
+        h_types=[('FT'),('FU')]
+        headers = {
+            'Accept': '*/*',
+            'Accept-Encoding': 'gzip, deflate',
+            'Accept-Language': 'zh-CN,zh;q=0.9',
+            'Connection': 'keep-alive',
+            'Content-Length': '130',
+            'Content-type': 'application/x-www-form-urlencoded',
+            'Cookie': '_ga=GA1.2.471918301.1572059707; _gid=GA1.2.2109447865.1572059707; _gat=1',
+            'Host': 'm.hgg070.com',
+            'Origin': 'http://m.hgg070.com',
+            'Referer': 'http://m.hgg070.com/',
+            'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Mobile Safari/537.36'
+        }
+        url = "http://m.hgg070.com/app/member/get_league_list.php"
+        for item in h_types:
+            showtype = item
+            data={
+                'uid': '257853bc6f4166ca4e84f4d75d1cfc3540c6eab54b34898f4ad405cb2412402f',
+                'langx': 'zh-cn',
+                'ltype': '3',
+                'gtype': 'BK',
+                'showtype': showtype,
+                'sorttype': '',
+                'date': '',
+                'isP': ''
+            }
+            yield scrapy.FormRequest(url=url,formdata=data,callback=self.parse,headers=headers,
+                                      meta={"data":data}, dont_filter=True)
+
+    def parse(self, response):
+        #获取id并判断抓取的球型
+        data=response.meta["data"]
+        fromdata=copy.deepcopy(data)
+        league=response.xpath('//league')
+        url="http://m.hgg070.com/app/member/get_game_list.php"
+        for le in league:
+            name=le.xpath('./league_name/text()').extract_first()
+            if len(self.remath.findall(name))>0:
+                lid = le.xpath('./league_id/text()').extract_first()
+                # 抓取今日
+                if data["showtype"]=="FT":
+                    data['lid'],data['sorttype'],data['date']=lid,'league',''
+                # 抓取早盘
+                elif data["showtype"]=="FU":
+                    data['lid'], data['sorttype'], data['date'] = lid, 'league', 'all'
+                yield scrapy.FormRequest(url=url,formdata=data,callback=self.detailball,meta={"data":fromdata},dont_filter=True)
+
+    def detailball(self,response):
+        data=response.meta["data"]
+        url="http://m.hgg070.com/app/member/get_game_more.php"
+        #获取联赛id gid
+        game=response.xpath("//game")
+        for g in game:
+            gid=g.xpath("./gid/text()").extract_first()
+            data["gid"]=gid
+            yield scrapy.FormRequest(url=url,formdata=data,callback=self.getItem,dont_filter=True)
+
+
+    def getItem(self,response):
+        data=response.xpath("//game")
+        for item in data:
+            obj=LanqiuItem()
+            obj['id']=item.xpath("./gid/text()").extract_first()
+            obj['league'] = item.xpath("./league/text()").extract_first()
+            obj['team_h'] = item.xpath("./team_h/text()").extract_first()
+            obj['team_c'] = item.xpath("./team_c/text()").extract_first()
+            obj['showtype'] = item.xpath("./gtype/text()").extract_first()
+            obj['datetime'] = item.xpath("./datetime/text()").extract_first()
+            yield obj
+
+
+
+
+
+
+

+ 81 - 20
hgg070_spider/spiders/lanqiu.py

@@ -1,16 +1,22 @@
 # -*- coding: utf-8 -*-
 import scrapy
-import re
+from ..items import LanqiuItem
 import copy
+import lxml.etree
+import re,os,json
+from ..utils.helper import Helper
+import time
 from ..items import LanqiuItem
-class LanqiuSpider(scrapy.Spider):
-    name = 'lanqiu'
+import xmltodict
+
+class LqSportsSpider(scrapy.Spider):
+    name = 'lq_sports'
     allowed_domains = ['m.hgg070.com/']
     start_urls = ['http://m.hgg070.com//']
-    remath=re.compile("篮球")
+    remath = re.compile("篮球")
     custom_settings={
         "ITEM_PIPELINES": {
-            "hgg070_spider.pipelines.lanqiu.ZuqiuPipeline": 200,
+            "hgg070_spider.pipelines.lanqiu.LanqiuPipeline": 200,
         },
     }
     def start_requests(self):
@@ -33,7 +39,7 @@ class LanqiuSpider(scrapy.Spider):
         for item in h_types:
             showtype = item
             data={
-                'uid': '257853bc6f4166ca4e84f4d75d1cfc3540c6eab54b34898f4ad405cb2412402f',
+                'uid': 'a8b9b2facd6b19ab7023a2b8686207d4ea98c3ab68e455abe8fe49a4861ff68f',
                 'langx': 'zh-cn',
                 'ltype': '3',
                 'gtype': 'BK',
@@ -70,25 +76,80 @@ class LanqiuSpider(scrapy.Spider):
         game=response.xpath("//game")
         for g in game:
             gid=g.xpath("./gid/text()").extract_first()
+            more_count = g.xpath("./more_count/text()").extract_first()
             data["gid"]=gid
-            yield scrapy.FormRequest(url=url,formdata=data,callback=self.getItem,dont_filter=True)
+            yield scrapy.FormRequest(url=url,formdata=data,callback=self.getItem,meta={"more_count":more_count,"isP":data["isP"]},dont_filter=True)
 
 
     def getItem(self,response):
-        data=response.xpath("//game")
-        for item in data:
-            obj=LanqiuItem()
-            obj['id']=item.xpath("./gid/text()").extract_first()
-            obj['league'] = item.xpath("./league/text()").extract_first()
-            obj['team_h'] = item.xpath("./team_h/text()").extract_first()
-            obj['team_c'] = item.xpath("./team_c/text()").extract_first()
-            obj['showtype'] = item.xpath("./gtype/text()").extract_first()
-            obj['datetime'] = item.xpath("./datetime/text()").extract_first()
-            yield obj
-
-
-
+        more_count = response.meta["more_count"]
+        isP = response.meta["isP"]
+        showtype=response.xpath('//serverresponse/showtype')
+        data= xmltodict.parse(response.text)['serverresponse']['game']
+        game_lists=[i for i in data if i['gopen']=='Y']
 
+        if game_lists:
+            for gl in game_lists:
+                cpath=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+                with open(cpath+"/conf/hgg070.json",encoding='utf8') as hg:
+                    hgg=json.load(hg)['bk']
+                datetime = gl['datetime'][:-8] + " " + gl['datetime'][-8:]
+                team_h = gl['team_h']
+                team_c = gl['team_c']
+                league_id = gl['gidm']
+                match_id = gl.get('gid', '')
+                match_uid = Helper.genearte_uuid(team_h + team_c + datetime)
+                data = []
+                for hg in hgg:
+                    items=hg['items']
+                    if gl[hg['prodds']]=='Y':
+                        for x in items:
+                            odds_code = gl[x['rodds']]
+                            p_code = gl[hg['prodds']]
+                            odds=gl["ior_OUH"]
+                            #有两个条件,加两条数据
+                            if x['ratio_name']:      #大的
+                                condition_u=gl[x['ratio_name']]
+                                odds_only = hg["plodds"] + x["lodds"] + '0' + str(odds) + "hg3535" + str(match_id)
+                                sole = hg["plodds"] + x["lodds"] + '0' + str(match_id) + "hg3535"
+                                tobj = {"match_id": match_id, "lg_id": league_id, "odds_code": odds_code, "status": 0,
+                                        "sort": 0, "p_code": p_code,
+                                        "odds": odds, "condition": condition_u, "odds_only": odds_only, "sole": sole,
+                                        "source": "hgg070", "type": 0, "team": ""}
+                                data.append(tobj)
 
+                            if x['latio']:   #小的
+                                condition_s = gl[x['latio']]
+                                odds_only =hg["plodds"] + x["lodds"] + '0' + str(odds) + "hg3535" + str(match_id)
+                                sole = hg["plodds"] + x["lodds"] + '0' + str(match_id) + "hg3535"
+                                tobj = {"match_id": match_id, "lg_id": league_id, "odds_code": odds_code, "status": 0,
+                                        "sort": 0, "p_code": p_code,
+                                        "odds": odds,"condition": condition_s, "odds_only": odds_only, "sole": sole,
+                                        "source": "hgg070", "type": 0, "team": ""}
+                                data.append(tobj)
 
+                            if not x['latio'] and not x['ratio_name']:
+                                condition_s = ''
+                                odds_only = hg["plodds"] + x["lodds"] + '0' + str(odds) + "hg3535" + str(match_id)
+                                sole = hg["plodds"] + x["lodds"] + '0' + str(match_id) + "hg3535"
+                                tobj = {"match_id": match_id, "lg_id": league_id, "odds_code": odds_code, "status": 0,
+                                        "sort": 0, "p_code": p_code,
+                                        "odds": odds,"condition": condition_s, "odds_only": odds_only, "sole": sole,
+                                        "source": "hgg070", "type": 0, "team": ""}
+                                data.append(tobj)
 
+                    item = LanqiuItem()
+                    item['match_id'] = match_id
+                    item['source'] = "hg0088"
+                    item['updata'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+                    item['content'] = data
+                    item['league_id'] = league_id
+                    item['more_count'] = more_count
+                    item['league'] = gl["league"]
+                    item['match_identity'] = match_uid
+                    item['datetime'] = datetime
+                    item['team_h'] = team_h
+                    item['team_c'] = team_c
+                    item['isP'] = isP
+                    item['showtype'] = showtype
+                    yield item

+ 0 - 154
hgg070_spider/spiders/lq_sports.py

@@ -1,154 +0,0 @@
-# -*- coding: utf-8 -*-
-import scrapy
-from ..items import LanqiuItem
-import copy
-import lxml.etree
-import re,os,json
-from ..utils.helper import Helper
-import time
-from ..items import LanqiuItem
-import xmltodict
-
-class LqSportsSpider(scrapy.Spider):
-    name = 'lq_sports'
-    allowed_domains = ['m.hgg070.com/']
-    start_urls = ['http://m.hgg070.com//']
-    remath = re.compile("篮球")
-    # custom_settings={
-    #     "ITEM_PIPELINES": {
-    #         "hgg070_spider.pipelines.lq_sports.LqSportsPipeline": 200,
-    #     },
-    # }
-    def start_requests(self):
-        #今日,早盘
-        h_types=[('FT'),('FU')]
-        headers = {
-            'Accept': '*/*',
-            'Accept-Encoding': 'gzip, deflate',
-            'Accept-Language': 'zh-CN,zh;q=0.9',
-            'Connection': 'keep-alive',
-            'Content-Length': '130',
-            'Content-type': 'application/x-www-form-urlencoded',
-            'Cookie': '_ga=GA1.2.471918301.1572059707; _gid=GA1.2.2109447865.1572059707; _gat=1',
-            'Host': 'm.hgg070.com',
-            'Origin': 'http://m.hgg070.com',
-            'Referer': 'http://m.hgg070.com/',
-            'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Mobile Safari/537.36'
-        }
-        url = "http://m.hgg070.com/app/member/get_league_list.php"
-        for item in h_types:
-            showtype = item
-            data={
-                'uid': '3970335d20df9b8ceca8673ae9b6ea910c912492f595c0ef163623ae0ea883b6',
-                'langx': 'zh-cn',
-                'ltype': '3',
-                'gtype': 'BK',
-                'showtype': showtype,
-                'sorttype': '',
-                'date': '',
-                'isP': ''
-            }
-            yield scrapy.FormRequest(url=url,formdata=data,callback=self.parse,headers=headers,
-                                      meta={"data":data}, dont_filter=True)
-
-    def parse(self, response):
-        #获取id并判断抓取的球型
-        data=response.meta["data"]
-        fromdata=copy.deepcopy(data)
-        league=response.xpath('//league')
-        url="http://m.hgg070.com/app/member/get_game_list.php"
-        for le in league:
-            name=le.xpath('./league_name/text()').extract_first()
-            if len(self.remath.findall(name))>0:
-                lid = le.xpath('./league_id/text()').extract_first()
-                # 抓取今日
-                if data["showtype"]=="FT":
-                    data['lid'],data['sorttype'],data['date']=lid,'league',''
-                # 抓取早盘
-                elif data["showtype"]=="FU":
-                    data['lid'], data['sorttype'], data['date'] = lid, 'league', 'all'
-                yield scrapy.FormRequest(url=url,formdata=data,callback=self.detailball,meta={"data":fromdata},dont_filter=True)
-
-    def detailball(self,response):
-        data=response.meta["data"]
-        url="http://m.hgg070.com/app/member/get_game_more.php"
-        #获取联赛id gid
-        game=response.xpath("//game")
-        for g in game:
-            gid=g.xpath("./gid/text()").extract_first()
-            more_count = g.xpath("./more_count/text()").extract_first()
-            data["gid"]=gid
-            yield scrapy.FormRequest(url=url,formdata=data,callback=self.getItem,meta={"more_count":more_count,"isP":data["isP"]},dont_filter=True)
-
-
-    def getItem(self,response):
-        more_count = response.meta["more_count"]
-        isP = response.meta["isP"]
-        data= xmltodict.parse(response.text)['serverresponse']['game']
-        game_lists=[i for i in data if i['gopen']=='Y']
-
-        if game_lists:
-            for gl in game_lists:
-                cpath=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-                with open(cpath+"/conf/hgg070.json",encoding='utf8') as hg:
-                    hgg=json.load(hg)['bk']
-                datetime = gl['datetime'][:-8] + " " + gl['datetime'][-8:]
-                team_h = gl['team_h']
-                team_c = gl['team_c']
-                league_id = gl['gidm']
-                match_id = gl.get('gid', '')
-                match_uid = Helper.genearte_uuid(team_h + team_c + datetime)
-                data = []
-                for hg in hgg:
-                    items=hg['items']
-                    if gl[hg['prodds']]=='Y':
-                        for x in items:
-                            odds_code = gl[x['rodds']]
-                            p_code = gl[hg['prodds']]
-                            odds=gl["ior_OUH"]
-                            #有两个条件,加两条数据
-                            if x['ratio_name']:      #大的
-                                condition_u=gl[x['ratio_name']]
-                                odds_only = hg["plodds"] + x["lodds"] + '0' + condition_u + str(odds) + "hg3535" + str(match_id)
-                                sole = hg["plodds"] + x["lodds"] + '0' + str(match_id) + "hg3535"
-                                tobj = {"match_id": match_id, "lg_id": league_id, "odds_code": odds_code, "status": 0,
-                                        "sort": 0, "p_code": p_code,
-                                        "odds": odds, "condition": condition_u, "odds_only": odds_only, "sole": sole,
-                                        "source": "hgg070", "type": 0, "team": ""}
-                                data.append(tobj)
-
-                            if x['latio']:   #小的
-                                condition_s = gl[x['latio']]
-                                odds_only =hg["plodds"] + x["lodds"] + '0' +condition_s + str(odds) + "hg3535" + str(match_id)
-                                sole = hg["plodds"] + x["lodds"] + '0' + str(match_id) + "hg3535"
-                                tobj = {"match_id": match_id, "lg_id": league_id, "odds_code": odds_code, "status": 0,
-                                        "sort": 0, "p_code": p_code,
-                                        "odds": odds,"condition": condition_s, "odds_only": odds_only, "sole": sole,
-                                        "source": "hgg070", "type": 0, "team": ""}
-                                data.append(tobj)
-
-                            if not x['latio'] and not x['ratio_name']:
-                                condition_s = ''
-                                odds_only = hg["plodds"] + x["lodds"] + '0' +condition_s + str(odds) + "hg3535" + str(match_id)
-                                sole = hg["plodds"] + x["lodds"] + '0' + str(match_id) + "hg3535"
-                                tobj = {"match_id": match_id, "lg_id": league_id, "odds_code": odds_code, "status": 0,
-                                        "sort": 0, "p_code": p_code,
-                                        "odds": odds,"condition": condition_s, "odds_only": odds_only, "sole": sole,
-                                        "source": "hgg070", "type": 0, "team": ""}
-                                data.append(tobj)
-
-                    item = LanqiuItem()
-                    item['match_id'] = match_id
-                    item['source'] = "hg0088"
-                    item['updata'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
-                    item['content'] = data
-                    item['league_id'] = league_id
-                    item['more_count'] = more_count
-                    item['league'] = gl["league"]
-                    item['match_identity'] = match_uid
-                    item['datetime'] = datetime
-                    item['team_h'] = team_h
-                    item['team_c'] = team_c
-                    item['isP'] = isP
-                    print('wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww',item)
-                    yield item