Your Name 6 лет назад
Родитель
Сommit
4f6742ca9c

+ 3 - 1
hgg070_spider/items.py

@@ -9,4 +9,6 @@ import scrapy
 
 
 class ZuqiuItem(scrapy.Field):
-    all = scrapy.Field()
+    data = scrapy.Field()
+    index = scrapy.Field()
+    tag = scrapy.Field()

+ 86 - 12
hgg070_spider/pipelines/zuqiu.py

@@ -1,12 +1,18 @@
-
-
-# from twisted.internet import defer,reactor
-# from ..utils.helper import Helper
+import datetime
+import time
+import logging
+import pymongo
+from twisted.internet import defer, reactor
+from ..utils.helper import Helper
+from ..settings import M_HOST, M_USER, M_PASSWORD, M_POST, M_DB, LEAGUE_URL, ODDS_URL, MATCH_URL
 
 
 class ZuqiuPipeline(object):
     def open_spider(self, spider):
-        pass
+        self.mongo = pymongo.MongoClient(host=M_HOST, username=M_USER, password=M_PASSWORD, port=M_POST,
+                                         authSource=M_DB)
+        self.db = self.mongo[M_DB]
+
     # @defer.inlineCallbacks
     # def process_item(self,item,spider):
     #     out=defer.Deferred()
@@ -17,10 +23,78 @@ class ZuqiuPipeline(object):
     #     pass
 
     def process_item(self, item, spider):
-        all = item['all']
-        team_h, team_c = all['team_h'], all['team_c']
-        league, league_id = all['league'], all['league']
-        datetime, re_time = all['datetime'], all['re_time']
-        match_id = all['gid']
-        print(league, team_h, team_c, datetime, match_id, league_id)
-        return None
+        logger = logging.getLogger(__name__)
+        match_all = item['data']
+        pt = str(item['index'])
+        team_h, team_c = match_all['team_h'], match_all['team_c']
+        league_name, league_id = match_all['league'], match_all['gidm']
+        us_time, re_time = match_all['datetime'], match_all['re_time']
+        match_id = match_all['gid']
+        tag_number = item['tag']
+        uuid = Helper.genearte_uuid(league_name)
+        league_list = []
+        last_time = '{}-12-31 23:59:59'.format(datetime.datetime.now().year)
+        match_date, match_time, time3 = Helper.change_time(us_time)
+        if self.db.zq_league35.find({'lg_id': league_id}).count() < 1:
+            # if self.db.zq_league35.find({'uuid': uuid}).count() < 1:
+            league_dict = {"game_code": "zq", "title": "league", "source": "hg3535"}
+            league_key = ["name_chinese", "kind", "match_mode", "if_stop", "last_time", "lg_id", "source", "uuid"]
+            league_value = [league_name, "1", "1", "0", last_time, league_id, "hg3535", uuid]
+            league_data = dict(zip(league_key, league_value))
+            league_list.append(league_data)
+            league_dict['data'] = league_list
+            res = Helper.async_post(LEAGUE_URL, league_dict)
+            if res:
+                if res.get('status') == 1:
+                    self.db.zq_league070.insert(league_data)
+                    logging.info('足球联赛提交, {}'.format(res))
+            else:
+                logging.warning('足球联赛接口异常, {}'.format(res))
+        else:
+            logging.info('{},联赛已存在, 不提交'.format(league_name))
+        pt_dict = {'0': 'is_today', '1': 'is_morningplate', '2': 'is_stringscene', '3': 'is_rollball'}
+        pt_status = pt_dict[pt]
+        if pt == '0':
+            is_rollball = 0
+            is_today = 1
+            is_morningplate = 0
+            is_stringscene = 0
+        elif pt == '2':
+            is_rollball = 0
+            is_today = 0
+            is_morningplate = 1
+            is_stringscene = 0
+        else:
+            is_today = 0
+            is_rollball = 0
+            is_morningplate = 0
+            is_stringscene = 1
+
+        match_list = []
+        match_identity = Helper.genearte_uuid(team_h + team_c + match_date)
+        if self.db.zq_competition35.find({'match_id': match_id, pt_status: 1}).count() < 1:
+            # if self.db.zq_competition35.find({'match_identity': match_identity, pt_status: 1}).count() < 1:
+            match_dict = {"game_code": "zq", "title": "match", "source": "hg3535"}
+            match_kay = ["home_team", "guest_team", "lg_id", "status", "match_id", "match_date", "match_time",
+                         "tag", "source", "is_rollball", "is_morningplate", "is_stringscene", "us_time", "uuid",
+                         "half_match_id", "is_today", "is_horn", 'match_identity']
+            match_value = [team_h, team_c, league_id, 0, match_id, match_date, match_time, tag_number,
+                           "hg3535", is_rollball, is_morningplate, is_stringscene, us_time, uuid, 0, is_today, 0,
+                           match_identity]
+            match_data = dict(zip(match_kay, match_value))
+            match_list.append(match_data)
+            match_dict['data'] = match_list
+            res = Helper.async_post(MATCH_URL, match_dict)
+            if res:
+                if res.get('status') == 1:
+                    self.db.zq_competition070.insert(match_data)
+                    logging.info('足球赛事提交, {}'.format(res))
+                else:
+                    logger.warning('足球赛事表提交失败, {}'.format(res))
+                    # logger.warning(match_dict)
+            else:
+                logger.warning('足球赛事接口异常提交失败, {}'.format(res))
+                # logger.warning(match_dict)
+        else:
+            logger.info('足球赛事已存在,不提交')
+        # reactor.callFromThread(out.callback, item)

+ 6 - 6
hgg070_spider/settings.py

@@ -96,12 +96,12 @@ M_DB = 'kaiyou'
 M_PASSWORD = 'kaiyou'
 # M_PASSWORD = '123456'
 
-LEAGUE_URL = 'http://stadmin.bocai108.com:19093/setLeague'
-# LEAGUE_URL = 'http://stadmin.bocai108.com/setLeague'
-MATCH_URL = 'http://stadmin.bocai108.com:19093/setMatch'
-# MATCH_URL = 'http://stadmin.bocai108.com/setMatch'
-ODDS_URL = 'http://stadmin.bocai108.com:19093/setOdds'
-# ODDS_URL = 'http://stadmin.bocai108.com/setOdds'
+# LEAGUE_URL = 'http://stadmin.bocai108.com:19093/setLeague'
+LEAGUE_URL = 'http://admin.5gogo.com/setLeague'
+# MATCH_URL = 'http://stadmin.bocai108.com:19093/setMatch'
+MATCH_URL = 'http://admin.5gogo.com/setMatch'
+# ODDS_URL = 'http://stadmin.bocai108.com:19093/setOdds'
+ODDS_URL = 'http://admin.5gogo.com/setOdds'
 TOKEN_URL = "http://stadmin.bocai108.com/getToken"
 MATCH_RESULT = "http://stadmin.bocai108.com:19093/setMatchResult"
 MATCH_STATUS = "http://stadmin.bocai108.com:19093/upMatch"

+ 1 - 0
hgg070_spider/spiders/lanqiu.py

@@ -2,6 +2,7 @@
 import scrapy
 import re
 
+
 class LanqiuSpider(scrapy.Spider):
     name = 'lanqiu'
     allowed_domains = ['m.hgg070.com/']

+ 26 - 39
hgg070_spider/spiders/zuqiu.py

@@ -12,7 +12,7 @@ class ZuqiuSpider(scrapy.Spider):
     allowed_domains = ['m.hgg070.com']
     custom_settings = {
         "ITEM_PIPELINES": {
-            # "hgg070_spider.pipelines.zuqiu.ZuqiuPipeline": 200,
+            "hgg070_spider.pipelines.zuqiu.ZuqiuPipeline": 200,
         },
         # 'LOG_LEVEL': 'DEBUG',
         # 'LOG_FILE': cpath + "/log/sports_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
@@ -35,12 +35,11 @@ class ZuqiuSpider(scrapy.Spider):
     def start_requests(self):
         url = "http://m.hgg070.com/app/member/get_league_list.php"
         h_types = [('FT', '', '130'), ('FU', 'P', '131'), ('FU', "", '130'), ('FU', 'P', '131')]
-        # for h_type in h_types:
         for i, h_type in enumerate(h_types):
             show_type, isp, length = h_type
             self.headers['Content-Length'] = length
             from_data = {
-                'uid': 'a76428ffc4d1ca306ea354d3ff5013bb8095c8d4101e7ce76db97f63ff061729',
+                'uid': '4d6e7f8af34715653b6039ca9b43737f096ed82446e3d37e033349aba0e3e753',
                 'langx': 'zh-cn',
                 'ltype': '3',
                 'gtype': 'FT',
@@ -50,28 +49,32 @@ class ZuqiuSpider(scrapy.Spider):
                 'isP': isp
             }
             yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers,
-                                     meta={'showtype': show_type, 'isp': isp, 'index': i}, dont_filter=True)
+                                     meta={'index': i}, dont_filter=True)
 
     def parse(self, response):
         leagues = response.xpath('//serverresponse/game/league')
         url = 'http://m.hgg070.com/app/member/get_game_list.php'
         if leagues:
-            showtype = response.meta['showtype']
-            isp = response.meta['isp']
             index = response.meta['index']
             if index == 0:
                 date = ''
+                showtype = 'FT'
+                isp = ''
                 self.headers['Content-Length'] = '147'
             elif index == 2:
                 date = 'all'
+                showtype = 'FU'
+                isp = ''
                 self.headers['Content-Length'] = '150'
             else:
                 date = 'all'
+                showtype = 'FU'
+                isp = 'P'
                 self.headers['Content-Length'] = '151'
             for league in leagues:
                 lid = league.xpath('.//league_id/text()').extract_first()
                 from_data = {
-                    'uid': 'a76428ffc4d1ca306ea354d3ff5013bb8095c8d4101e7ce76db97f63ff061729',
+                    'uid': '4d6e7f8af34715653b6039ca9b43737f096ed82446e3d37e033349aba0e3e753',
                     'langx': 'zh-cn',
                     'ltype': '3',
                     'gtype': 'FT',
@@ -82,20 +85,13 @@ class ZuqiuSpider(scrapy.Spider):
                     'isP': isp
                 }
                 yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers,
-                                         meta={'showtype': showtype, 'isp': isp, 'index': index}, dont_filter=True)
+                                         meta={'index': index}, dont_filter=True)
         else:
             print('未获取到联赛id')
             return
 
     def parse_match(self, response):
-        # showtype = response.meta['showtype']
-        # isp = response.meta['isp']
         index = response.meta['index']
-        if response.status == 400:
-            print(response.status)
-            # print(showtype, isp)
-            print('parse_odds', response.url)
-        pass
         url = 'http://m.hgg070.com/app/member/get_game_more.php'
         if index == 0:
             date = ''
@@ -117,25 +113,12 @@ class ZuqiuSpider(scrapy.Spider):
             showtype = 'FU'
             isp = 'P'
             self.headers['Content-Length'] = '136'
-    #     showtype = response.meta['showtype']
-    #     isp = response.meta['isp']
-    #     if showtype == 'FT' and isp == '':
-    #         date = ''
-    #         self.headers['Content-Length'] = '132'
-    #     elif showtype == 'FU' and isp == 'P':
-    #         date = 'all'
-    #         self.headers['Content-Length'] = '136'
-    #     elif showtype == 'FU' and isp == '':
-    #         date = ''
-    #         self.headers['Content-Length'] = '132'
-    #     else:
-    #         date = 'all'
-    #         self.headers['Content-Length'] = '136'
         gids = response.xpath('//serverresponse/game/gid/text()').extract()
+        tags = response.xpath('//serverresponse/game/more_count/text()').extract()
         if gids:
-            for gid in gids:
+            for i, gid in enumerate(gids):
                 from_data = {
-                    'uid': 'a76428ffc4d1ca306ea354d3ff5013bb8095c8d4101e7ce76db97f63ff061729',
+                    'uid': '4d6e7f8af34715653b6039ca9b43737f096ed82446e3d37e033349aba0e3e753',
                     'langx': 'zh-cn',
                     'ltype': '3',
                     'gtype': 'FT',
@@ -144,14 +127,17 @@ class ZuqiuSpider(scrapy.Spider):
                     'isP': isp,
                     'gid': gid,
                 }
+                tag = tags[i]
                 yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_odds, headers=self.headers,
-                                         meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
+                                         meta={'index': index, 'tag': tag}, dont_filter=True)
 
     def parse_odds(self, response):
-        if response.status == 400:
-            print(response.status)
-            print('parse_odds', response.url)
-        game = response.xpath('//serverresponse/game')[0]
+        index = response.meta['index']
+        tag = response.meta['tag']
+        try:
+            game = response.xpath('//serverresponse/game')[0]
+        except:
+            return
         logger = logging.getLogger(__name__)
         if game:
             game_odds = {}
@@ -164,8 +150,9 @@ class ZuqiuSpider(scrapy.Spider):
                     else:
                         game_odds[i.tag] = i.text
             else:
-                pass
-                # logger.info('gopen == "N", 详细赔率盘口未开启')
+                logger.info('gopen == "N", 详细赔率盘口未开启')
             item = ZuqiuItem()
-            item['all'] = game_odds
+            item['data'] = game_odds
+            item['index'] = index
+            item['tag'] = tag
             yield item

+ 1 - 1
hgg070_spider/utils/LocalToken.py

@@ -1 +1 @@
-token = {'token': 'mVUm5Y15717377705daed0aad4869', 'username': 'python', 'password': 'python888', 'token_url': 'http://stadmin.bocai108.com/getToken'}
+token = {'token': 'JlUo1415650051585d481566c10c9', 'username': 'python', 'password': 'python888', 'token_url': 'http://admin.5gogo.com/getToken'}

+ 10 - 0
hgg070_spider/utils/helper.py

@@ -1,5 +1,7 @@
 import hashlib
 import json
+import time
+
 from requests_futures.sessions import FuturesSession
 from .langconv import *
 from .LocalToken import token
@@ -46,3 +48,11 @@ class Helper(object):
         hl = hashlib.md5()
         hl.update(line.encode(encoding='utf-8'))
         return hl.hexdigest()
+
+    @staticmethod
+    def change_time(ctime):
+        time1 = time.mktime(time.strptime(ctime, '%Y-%m-%d %H:%M:%S')) + 43200
+        time2 = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time1))
+        match_date = time2.split(" ")[0]
+        match_time = time2.split(" ")[1]
+        return match_date, match_time, time2