Browse Source

Merge remote-tracking branch 'origin/master'

Your Name 6 years ago
parent
commit
cb4fec1ad8

+ 4 - 0
hgg070_spider/main.py

@@ -5,7 +5,11 @@ from scrapy.cmdline import execute
 # print(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 # execute(["scrapy", "crawl", "zuqiu"])
+<<<<<<< HEAD
+execute(["scrapy", "crawl", "lanqiu"])
+=======
 # execute(["scrapy", "crawl", "lanqiu"])
+>>>>>>> d24c6bfe93390b44800468998733ecf116d4bdf0
 # execute(["scrapy", "crawl", "lq_sports"])
 # execute(["scrapy", "crawl", "guanjun"])
 # execute(["scrapy", "crawl", "wangqiu"])

+ 5 - 0
hgg070_spider/pipelines/lq_sports.py → hgg070_spider/pipelines/aaaaa.py

@@ -7,9 +7,14 @@ import time
 
 class LqSportsPipeline(object):
     @defer.inlineCallbacks
+<<<<<<< HEAD:hgg070_spider/pipelines/aaaaa.py
+    def process_item(self,item,spider):
+        logger=logging.getLogger(__name__)
+=======
     def process_item(self, item, spider):
         print('555555555555555555555555555555555555555555555555555555555555555555555')
         logger = logging.getLogger(__name__)
+>>>>>>> d24c6bfe93390b44800468998733ecf116d4bdf0:hgg070_spider/pipelines/lq_sports.py
         logger.info("进入管道")
         out = defer.Deferred()
         reactor.callInThread(self._do_calculation, item, out)

+ 46 - 8
hgg070_spider/pipelines/lanqiu.py

@@ -1,17 +1,14 @@
 import logging
 from twisted.internet import defer, reactor
 from ..utils.helper import Helper
-from ..settings import LEAGUE_URL, MATCH_URL
-import pymongo
-from ..settings import M_HOST, M_USER, M_PASSWORD, M_POST, M_DB
-
-
-class ZuqiuPipeline(object):
+from ..settings import LEAGUE_URL,MATCH_URL
+import pymongo,time
+from ..settings import M_HOST,M_USER,M_PASSWORD,M_POST,M_DB,ODDS_URL
+class LanqiuPipeline(object):
     def open_spider(self, spider):
         self.mongo = pymongo.MongoClient(host=M_HOST, username=M_USER, password=M_PASSWORD, port=M_POST,
                                          authSource=M_DB)
         self.db = self.mongo[M_DB]
-
     @defer.inlineCallbacks
     def process_item(self, item, spider):
         logger = logging.getLogger(__name__)
@@ -33,6 +30,45 @@ class ZuqiuPipeline(object):
         elif type == "RB":
             is_rollball = 1
         else:
+            is_stringscene=1
+        league_key = ["name_chinese", "kind", "match_mode", "if_stop", "last_time", "lg_id", "source", "uuid","is_rollball","is_today","is_morningplate","is_stringscene"]
+        league_value = [league_name, "1", "1", "0", item['datetime'], item['match_id'], "hgg070", uuid,is_rollball,is_today,is_morningplate,is_stringscene]
+        #赛事
+        childer = dict(zip(league_key, league_value))
+        #联赛
+        obj = {"game_code": "lq", "title": "league", "source": "hgg070","data":[childer]}
+        res=Helper.async_post(LEAGUE_URL,obj)
+        if res:
+            if res.get('status')==1:
+                logging.warning("联赛提交成功,{}1".format(res))
+                #提交赛事
+                lres=Helper.async_post(MATCH_URL,childer)
+                if lres.get('status')==1:
+                    logging.warning("赛事提交成功,{}2".format(res))
+                    #保存赔率
+                    # 是否串场
+                    if item['isP'] == 'P':
+                        ris_stringscene = 1
+                    else:
+                        ris_stringscene = 0
+                    # 现在时间,时间戳
+                    utime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+                    odds_key = ["game_code", "title", "match_id", "lg_id", "data", "source", "odds_only", "tag", "uuid",
+                                "is_stringscene", "utime", "pt", 'match_identity']
+                    odds_value = ["lq", "odds", item['match_id'], item['league_id'], item["content"], "hgg070", [],
+                                  item['more_count'], uuid,
+                                  ris_stringscene, utime, item['isP'], item["match_identity"]]
+                    # 赛事
+                    odderlist = dict(zip(odds_key, odds_value))
+                    res = Helper.async_post(ODDS_URL, odderlist)
+                    if res:
+                        if res.get('status') == 1:
+                            logging.warning("赔率提交成功,{}3".format(res))
+                        else:
+                            logging.warning("赔率提交失败,{}4".format(res))
+                    else:
+                        logging.warning("赔率提交失败,{}5".format(res))
+
             is_stringscene = 1
         league_key = ["name_chinese", "kind", "match_mode", "if_stop", "last_time", "lg_id", "source", "uuid",
                       "is_rollball", "is_today", "is_morningplate", "is_stringscene"]
@@ -51,10 +87,12 @@ class ZuqiuPipeline(object):
                 lres = Helper.async_post(MATCH_URL, childer)
                 if lres.get('status') == 1:
                     logging.warning("联赛提交成功,{}".format(res))
+
                 else:
-                    logging.warning("赛提交失败,{}".format(res))
+                    logging.warning("赛提交失败,{}".format(res))
 
             else:
                 logging.warning("联赛提交失败,{}".format(res))
         else:
             logging.warning("联赛提交失败,{}".format(res))
+

BIN
hgg070_spider/spiders/__pycache__/lanqiu.cpython-37.pyc


+ 94 - 0
hgg070_spider/spiders/aaaa.py

@@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+import scrapy
+import re
+import copy
+from ..items import LanqiuItem
+class LanqiuSpider(scrapy.Spider):
+    name = 'lanqiu'
+    allowed_domains = ['m.hgg070.com/']
+    start_urls = ['http://m.hgg070.com//']
+    remath=re.compile("篮球")
+    custom_settings={
+        "ITEM_PIPELINES": {
+            "hgg070_spider.pipelines.lanqiu.ZuqiuPipeline": 200,
+        },
+    }
+    def start_requests(self):
+        #今日,早盘
+        h_types=[('FT'),('FU')]
+        headers = {
+            'Accept': '*/*',
+            'Accept-Encoding': 'gzip, deflate',
+            'Accept-Language': 'zh-CN,zh;q=0.9',
+            'Connection': 'keep-alive',
+            'Content-Length': '130',
+            'Content-type': 'application/x-www-form-urlencoded',
+            'Cookie': '_ga=GA1.2.471918301.1572059707; _gid=GA1.2.2109447865.1572059707; _gat=1',
+            'Host': 'm.hgg070.com',
+            'Origin': 'http://m.hgg070.com',
+            'Referer': 'http://m.hgg070.com/',
+            'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Mobile Safari/537.36'
+        }
+        url = "http://m.hgg070.com/app/member/get_league_list.php"
+        for item in h_types:
+            showtype = item
+            data={
+                'uid': '257853bc6f4166ca4e84f4d75d1cfc3540c6eab54b34898f4ad405cb2412402f',
+                'langx': 'zh-cn',
+                'ltype': '3',
+                'gtype': 'BK',
+                'showtype': showtype,
+                'sorttype': '',
+                'date': '',
+                'isP': ''
+            }
+            yield scrapy.FormRequest(url=url,formdata=data,callback=self.parse,headers=headers,
+                                      meta={"data":data}, dont_filter=True)
+
+    def parse(self, response):
+        #获取id并判断抓取的球型
+        data=response.meta["data"]
+        fromdata=copy.deepcopy(data)
+        league=response.xpath('//league')
+        url="http://m.hgg070.com/app/member/get_game_list.php"
+        for le in league:
+            name=le.xpath('./league_name/text()').extract_first()
+            if len(self.remath.findall(name))>0:
+                lid = le.xpath('./league_id/text()').extract_first()
+                # 抓取今日
+                if data["showtype"]=="FT":
+                    data['lid'],data['sorttype'],data['date']=lid,'league',''
+                # 抓取早盘
+                elif data["showtype"]=="FU":
+                    data['lid'], data['sorttype'], data['date'] = lid, 'league', 'all'
+                yield scrapy.FormRequest(url=url,formdata=data,callback=self.detailball,meta={"data":fromdata},dont_filter=True)
+
+    def detailball(self,response):
+        data=response.meta["data"]
+        url="http://m.hgg070.com/app/member/get_game_more.php"
+        #获取联赛id gid
+        game=response.xpath("//game")
+        for g in game:
+            gid=g.xpath("./gid/text()").extract_first()
+            data["gid"]=gid
+            yield scrapy.FormRequest(url=url,formdata=data,callback=self.getItem,dont_filter=True)
+
+
+    def getItem(self,response):
+        data=response.xpath("//game")
+        for item in data:
+            obj=LanqiuItem()
+            obj['id']=item.xpath("./gid/text()").extract_first()
+            obj['league'] = item.xpath("./league/text()").extract_first()
+            obj['team_h'] = item.xpath("./team_h/text()").extract_first()
+            obj['team_c'] = item.xpath("./team_c/text()").extract_first()
+            obj['showtype'] = item.xpath("./gtype/text()").extract_first()
+            obj['datetime'] = item.xpath("./datetime/text()").extract_first()
+            yield obj
+
+
+
+
+
+
+

+ 81 - 20
hgg070_spider/spiders/lanqiu.py

@@ -1,16 +1,22 @@
 # -*- coding: utf-8 -*-
 import scrapy
-import re
+from ..items import LanqiuItem
 import copy
+import lxml.etree
+import re,os,json
+from ..utils.helper import Helper
+import time
 from ..items import LanqiuItem
-class LanqiuSpider(scrapy.Spider):
-    name = 'lanqiu'
+import xmltodict
+
+class LqSportsSpider(scrapy.Spider):
+    name = 'lq_sports'
     allowed_domains = ['m.hgg070.com/']
     start_urls = ['http://m.hgg070.com//']
-    remath=re.compile("篮球")
+    remath = re.compile("篮球")
     custom_settings={
         "ITEM_PIPELINES": {
-            "hgg070_spider.pipelines.lanqiu.ZuqiuPipeline": 200,
+            "hgg070_spider.pipelines.lanqiu.LanqiuPipeline": 200,
         },
     }
     def start_requests(self):
@@ -33,7 +39,7 @@ class LanqiuSpider(scrapy.Spider):
         for item in h_types:
             showtype = item
             data={
-                'uid': '257853bc6f4166ca4e84f4d75d1cfc3540c6eab54b34898f4ad405cb2412402f',
+                'uid': 'a8b9b2facd6b19ab7023a2b8686207d4ea98c3ab68e455abe8fe49a4861ff68f',
                 'langx': 'zh-cn',
                 'ltype': '3',
                 'gtype': 'BK',
@@ -70,25 +76,80 @@ class LanqiuSpider(scrapy.Spider):
         game=response.xpath("//game")
         for g in game:
             gid=g.xpath("./gid/text()").extract_first()
+            more_count = g.xpath("./more_count/text()").extract_first()
             data["gid"]=gid
-            yield scrapy.FormRequest(url=url,formdata=data,callback=self.getItem,dont_filter=True)
+            yield scrapy.FormRequest(url=url,formdata=data,callback=self.getItem,meta={"more_count":more_count,"isP":data["isP"]},dont_filter=True)
 
 
     def getItem(self,response):
-        data=response.xpath("//game")
-        for item in data:
-            obj=LanqiuItem()
-            obj['id']=item.xpath("./gid/text()").extract_first()
-            obj['league'] = item.xpath("./league/text()").extract_first()
-            obj['team_h'] = item.xpath("./team_h/text()").extract_first()
-            obj['team_c'] = item.xpath("./team_c/text()").extract_first()
-            obj['showtype'] = item.xpath("./gtype/text()").extract_first()
-            obj['datetime'] = item.xpath("./datetime/text()").extract_first()
-            yield obj
-
-
-
+        more_count = response.meta["more_count"]
+        isP = response.meta["isP"]
+        showtype=response.xpath('//serverresponse/showtype')
+        data= xmltodict.parse(response.text)['serverresponse']['game']
+        game_lists=[i for i in data if i['gopen']=='Y']
 
+        if game_lists:
+            for gl in game_lists:
+                cpath=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+                with open(cpath+"/conf/hgg070.json",encoding='utf8') as hg:
+                    hgg=json.load(hg)['bk']
+                datetime = gl['datetime'][:-8] + " " + gl['datetime'][-8:]
+                team_h = gl['team_h']
+                team_c = gl['team_c']
+                league_id = gl['gidm']
+                match_id = gl.get('gid', '')
+                match_uid = Helper.genearte_uuid(team_h + team_c + datetime)
+                data = []
+                for hg in hgg:
+                    items=hg['items']
+                    if gl[hg['prodds']]=='Y':
+                        for x in items:
+                            odds_code = gl[x['rodds']]
+                            p_code = gl[hg['prodds']]
+                            odds=gl["ior_OUH"]
+                            #有两个条件,加两条数据
+                            if x['ratio_name']:      #大的
+                                condition_u=gl[x['ratio_name']]
+                                odds_only = hg["plodds"] + x["lodds"] + '0' + str(odds) + "hg3535" + str(match_id)
+                                sole = hg["plodds"] + x["lodds"] + '0' + str(match_id) + "hg3535"
+                                tobj = {"match_id": match_id, "lg_id": league_id, "odds_code": odds_code, "status": 0,
+                                        "sort": 0, "p_code": p_code,
+                                        "odds": odds, "condition": condition_u, "odds_only": odds_only, "sole": sole,
+                                        "source": "hgg070", "type": 0, "team": ""}
+                                data.append(tobj)
 
+                            if x['latio']:   #小的
+                                condition_s = gl[x['latio']]
+                                odds_only =hg["plodds"] + x["lodds"] + '0' + str(odds) + "hg3535" + str(match_id)
+                                sole = hg["plodds"] + x["lodds"] + '0' + str(match_id) + "hg3535"
+                                tobj = {"match_id": match_id, "lg_id": league_id, "odds_code": odds_code, "status": 0,
+                                        "sort": 0, "p_code": p_code,
+                                        "odds": odds,"condition": condition_s, "odds_only": odds_only, "sole": sole,
+                                        "source": "hgg070", "type": 0, "team": ""}
+                                data.append(tobj)
 
+                            if not x['latio'] and not x['ratio_name']:
+                                condition_s = ''
+                                odds_only = hg["plodds"] + x["lodds"] + '0' + str(odds) + "hg3535" + str(match_id)
+                                sole = hg["plodds"] + x["lodds"] + '0' + str(match_id) + "hg3535"
+                                tobj = {"match_id": match_id, "lg_id": league_id, "odds_code": odds_code, "status": 0,
+                                        "sort": 0, "p_code": p_code,
+                                        "odds": odds,"condition": condition_s, "odds_only": odds_only, "sole": sole,
+                                        "source": "hgg070", "type": 0, "team": ""}
+                                data.append(tobj)
 
+                    item = LanqiuItem()
+                    item['match_id'] = match_id
+                    item['source'] = "hg0088"
+                    item['updata'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+                    item['content'] = data
+                    item['league_id'] = league_id
+                    item['more_count'] = more_count
+                    item['league'] = gl["league"]
+                    item['match_identity'] = match_uid
+                    item['datetime'] = datetime
+                    item['team_h'] = team_h
+                    item['team_c'] = team_c
+                    item['isP'] = isP
+                    item['showtype'] = showtype
+                    yield item