1
0

3 Revīzijas eda9360313 ... 2cfec050f4

Autors SHA1 Ziņojums Datums
  Your Name 2cfec050f4 Merge remote-tracking branch 'origin/master' 6 gadi atpakaļ
  Your Name 4f6742ca9c 添加联赛,赛事提交 6 gadi atpakaļ
  Your Name 2071693702 更新 6 gadi atpakaļ

+ 3 - 1
hgg070_spider/items.py

@@ -9,7 +9,9 @@ import scrapy
 
 
 class ZuqiuItem(scrapy.Field):
-    all = scrapy.Field()
+    data = scrapy.Field()
+    index = scrapy.Field()
+    tag = scrapy.Field()
 
 
 class LanqiuItem(scrapy.Field):

+ 1 - 0
hgg070_spider/main.py

@@ -4,6 +4,7 @@ from scrapy.cmdline import execute
 
 # print(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+execute(["scrapy", "crawl", "zuqiu"])
 # execute(["scrapy", "crawl", "lanqiu"])
 execute(["scrapy", "crawl", "lq_sports"])
 # execute(["scrapy", "crawl", "guanjun"])

+ 86 - 11
hgg070_spider/pipelines/zuqiu.py

@@ -1,12 +1,18 @@
-
-
-# from twisted.internet import defer,reactor
-# from ..utils.helper import Helper
+import datetime
+import time
+import logging
+import pymongo
+from twisted.internet import defer, reactor
+from ..utils.helper import Helper
+from ..settings import M_HOST, M_USER, M_PASSWORD, M_POST, M_DB, LEAGUE_URL, ODDS_URL, MATCH_URL
 
 
 class ZuqiuPipeline(object):
     def open_spider(self, spider):
-        pass
+        self.mongo = pymongo.MongoClient(host=M_HOST, username=M_USER, password=M_PASSWORD, port=M_POST,
+                                         authSource=M_DB)
+        self.db = self.mongo[M_DB]
+
     # @defer.inlineCallbacks
     # def process_item(self,item,spider):
     #     out=defer.Deferred()
@@ -17,9 +23,78 @@ class ZuqiuPipeline(object):
     #     pass
 
     def process_item(self, item, spider):
-        all = item['all']
-        team_h, team_c = all['team_h'], all['team_c']
-        league, league_id = all['league'], all['league']
-        datetime, re_time = all['datetime'], all['re_time']
-        match_id = all['gid']
-        print(league, team_h, team_c, datetime, match_id, league_id)
+        logger = logging.getLogger(__name__)
+        match_all = item['data']
+        pt = str(item['index'])
+        team_h, team_c = match_all['team_h'], match_all['team_c']
+        league_name, league_id = match_all['league'], match_all['gidm']
+        us_time, re_time = match_all['datetime'], match_all['re_time']
+        match_id = match_all['gid']
+        tag_number = item['tag']
+        uuid = Helper.genearte_uuid(league_name)
+        league_list = []
+        last_time = '{}-12-31 23:59:59'.format(datetime.datetime.now().year)
+        match_date, match_time, time3 = Helper.change_time(us_time)
+        if self.db.zq_league35.find({'lg_id': league_id}).count() < 1:
+            # if self.db.zq_league35.find({'uuid': uuid}).count() < 1:
+            league_dict = {"game_code": "zq", "title": "league", "source": "hg3535"}
+            league_key = ["name_chinese", "kind", "match_mode", "if_stop", "last_time", "lg_id", "source", "uuid"]
+            league_value = [league_name, "1", "1", "0", last_time, league_id, "hg3535", uuid]
+            league_data = dict(zip(league_key, league_value))
+            league_list.append(league_data)
+            league_dict['data'] = league_list
+            res = Helper.async_post(LEAGUE_URL, league_dict)
+            if res:
+                if res.get('status') == 1:
+                    self.db.zq_league070.insert(league_data)
+                    logging.info('足球联赛提交, {}'.format(res))
+            else:
+                logging.warning('足球联赛接口异常, {}'.format(res))
+        else:
+            logging.info('{},联赛已存在, 不提交'.format(league_name))
+        pt_dict = {'0': 'is_today', '1': 'is_morningplate', '2': 'is_stringscene', '3': 'is_rollball'}
+        pt_status = pt_dict[pt]
+        if pt == '0':
+            is_rollball = 0
+            is_today = 1
+            is_morningplate = 0
+            is_stringscene = 0
+        elif pt == '2':
+            is_rollball = 0
+            is_today = 0
+            is_morningplate = 1
+            is_stringscene = 0
+        else:
+            is_today = 0
+            is_rollball = 0
+            is_morningplate = 0
+            is_stringscene = 1
+
+        match_list = []
+        match_identity = Helper.genearte_uuid(team_h + team_c + match_date)
+        if self.db.zq_competition35.find({'match_id': match_id, pt_status: 1}).count() < 1:
+            # if self.db.zq_competition35.find({'match_identity': match_identity, pt_status: 1}).count() < 1:
+            match_dict = {"game_code": "zq", "title": "match", "source": "hg3535"}
+            match_kay = ["home_team", "guest_team", "lg_id", "status", "match_id", "match_date", "match_time",
+                         "tag", "source", "is_rollball", "is_morningplate", "is_stringscene", "us_time", "uuid",
+                         "half_match_id", "is_today", "is_horn", 'match_identity']
+            match_value = [team_h, team_c, league_id, 0, match_id, match_date, match_time, tag_number,
+                           "hg3535", is_rollball, is_morningplate, is_stringscene, us_time, uuid, 0, is_today, 0,
+                           match_identity]
+            match_data = dict(zip(match_kay, match_value))
+            match_list.append(match_data)
+            match_dict['data'] = match_list
+            res = Helper.async_post(MATCH_URL, match_dict)
+            if res:
+                if res.get('status') == 1:
+                    self.db.zq_competition070.insert(match_data)
+                    logging.info('足球赛事提交, {}'.format(res))
+                else:
+                    logger.warning('足球赛事表提交失败, {}'.format(res))
+                    # logger.warning(match_dict)
+            else:
+                logger.warning('足球赛事接口异常提交失败, {}'.format(res))
+                # logger.warning(match_dict)
+        else:
+            logger.info('足球赛事已存在,不提交')
+        # reactor.callFromThread(out.callback, item)

+ 41 - 35
hgg070_spider/spiders/zuqiu.py

@@ -35,12 +35,11 @@ class ZuqiuSpider(scrapy.Spider):
     def start_requests(self):
         url = "http://m.hgg070.com/app/member/get_league_list.php"
         h_types = [('FT', '', '130'), ('FU', 'P', '131'), ('FU', "", '130'), ('FU', 'P', '131')]
-        for h_type in h_types:
-            # show_type, isp, length = h_type
-            show_type, isp, length = h_types[3]
+        for i, h_type in enumerate(h_types):
+            show_type, isp, length = h_type
             self.headers['Content-Length'] = length
             from_data = {
-                'uid': '013dc3a00cbd488238236010f78ab4a41af7e6ff05ceb96bc0854b60807a42eb',
+                'uid': '4d6e7f8af34715653b6039ca9b43737f096ed82446e3d37e033349aba0e3e753',
                 'langx': 'zh-cn',
                 'ltype': '3',
                 'gtype': 'FT',
@@ -50,33 +49,32 @@ class ZuqiuSpider(scrapy.Spider):
                 'isP': isp
             }
             yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers,
-                                     meta={'showtype': show_type, 'isp': isp}, dont_filter=True)
+                                     meta={'index': i}, dont_filter=True)
 
     def parse(self, response):
-        if response.status == 400:
-            print(response.status)
-            print('parse', response.url)
         leagues = response.xpath('//serverresponse/game/league')
         url = 'http://m.hgg070.com/app/member/get_game_list.php'
         if leagues:
-            showtype = response.meta['showtype']
-            isp = response.meta['isp']
-            if showtype == 'FT' and isp == '':
+            index = response.meta['index']
+            if index == 0:
                 date = ''
+                showtype = 'FT'
+                isp = ''
                 self.headers['Content-Length'] = '147'
-            elif showtype == 'FU' and isp == 'P':
-                date = 'all'
-                self.headers['Content-Length'] = '151'
-            elif showtype == 'FU' and isp == '':
+            elif index == 2:
                 date = 'all'
+                showtype = 'FU'
+                isp = ''
                 self.headers['Content-Length'] = '150'
             else:
                 date = 'all'
+                showtype = 'FU'
+                isp = 'P'
                 self.headers['Content-Length'] = '151'
             for league in leagues:
                 lid = league.xpath('.//league_id/text()').extract_first()
                 from_data = {
-                    'uid': '013dc3a00cbd488238236010f78ab4a41af7e6ff05ceb96bc0854b60807a42eb',
+                    'uid': '4d6e7f8af34715653b6039ca9b43737f096ed82446e3d37e033349aba0e3e753',
                     'langx': 'zh-cn',
                     'ltype': '3',
                     'gtype': 'FT',
@@ -87,35 +85,40 @@ class ZuqiuSpider(scrapy.Spider):
                     'isP': isp
                 }
                 yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers,
-                                         meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
+                                         meta={'index': index}, dont_filter=True)
         else:
             print('未获取到联赛id')
             return
 
     def parse_match(self, response):
-        if response.status == 400:
-            print(response.status)
-            print('parse_match', response.url)
+        index = response.meta['index']
         url = 'http://m.hgg070.com/app/member/get_game_more.php'
-        showtype = response.meta['showtype']
-        isp = response.meta['isp']
-        if showtype == 'FT' and isp == '':
+        if index == 0:
             date = ''
+            showtype = 'FT'
+            isp = ''
             self.headers['Content-Length'] = '132'
-        elif showtype == 'FU' and isp == 'P':
+        elif index == 1:
             date = 'all'
+            showtype = 'FT'
+            isp = 'P'
             self.headers['Content-Length'] = '136'
-        elif showtype == 'FU' and isp == '':
+        elif index == 2:
             date = ''
+            showtype = 'FU'
+            isp = ''
             self.headers['Content-Length'] = '132'
         else:
             date = 'all'
+            showtype = 'FU'
+            isp = 'P'
             self.headers['Content-Length'] = '136'
         gids = response.xpath('//serverresponse/game/gid/text()').extract()
+        tags = response.xpath('//serverresponse/game/more_count/text()').extract()
         if gids:
-            for gid in gids:
+            for i, gid in enumerate(gids):
                 from_data = {
-                    'uid': '013dc3a00cbd488238236010f78ab4a41af7e6ff05ceb96bc0854b60807a42eb',
+                    'uid': '4d6e7f8af34715653b6039ca9b43737f096ed82446e3d37e033349aba0e3e753',
                     'langx': 'zh-cn',
                     'ltype': '3',
                     'gtype': 'FT',
@@ -124,16 +127,17 @@ class ZuqiuSpider(scrapy.Spider):
                     'isP': isp,
                     'gid': gid,
                 }
+                tag = tags[i]
                 yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_odds, headers=self.headers,
-                                         meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
+                                         meta={'index': index, 'tag': tag}, dont_filter=True)
 
     def parse_odds(self, response):
-        # print(response.text)
-        # game_lists = []
-        if response.status == 400:
-            print(response.status)
-            print('parse_odds', response.url)
-        game = response.xpath('//serverresponse/game')[0]
+        index = response.meta['index']
+        tag = response.meta['tag']
+        try:
+            game = response.xpath('//serverresponse/game')[0]
+        except:
+            return
         logger = logging.getLogger(__name__)
         if game:
             game_odds = {}
@@ -148,5 +152,7 @@ class ZuqiuSpider(scrapy.Spider):
             else:
                 logger.info('gopen == "N", 详细赔率盘口未开启')
             item = ZuqiuItem()
-            item['all'] = game_odds
+            item['data'] = game_odds
+            item['index'] = index
+            item['tag'] = tag
             yield item

+ 10 - 0
hgg070_spider/utils/helper.py

@@ -1,5 +1,7 @@
 import hashlib
 import json
+import time
+
 from requests_futures.sessions import FuturesSession
 from .langconv import *
 from .LocalToken import token
@@ -46,3 +48,11 @@ class Helper(object):
         hl = hashlib.md5()
         hl.update(line.encode(encoding='utf-8'))
         return hl.hexdigest()
+
+    @staticmethod
+    def change_time(ctime):
+        time1 = time.mktime(time.strptime(ctime, '%Y-%m-%d %H:%M:%S')) + 43200
+        time2 = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time1))
+        match_date = time2.split(" ")[0]
+        match_time = time2.split(" ")[1]
+        return match_date, match_time, time2