Your Name hace 6 años
padre
commit
2071693702

+ 1 - 1
hgg070_spider/main.py

@@ -4,7 +4,7 @@ from scrapy.cmdline import execute
 
 # print(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
-execute(["scrapy", "crawl", "lanqiu"])
+execute(["scrapy", "crawl", "zuqiu"])
 # execute(["scrapy", "crawl", "lanqiu"])
 # execute(["scrapy", "crawl", "guanjun"])
 # execute(["scrapy", "crawl", "wangqiu"])

+ 1 - 0
hgg070_spider/pipelines/zuqiu.py

@@ -23,3 +23,4 @@ class ZuqiuPipeline(object):
         datetime, re_time = all['datetime'], all['re_time']
         match_id = all['gid']
         print(league, team_h, team_c, datetime, match_id, league_id)
+        return None

+ 45 - 26
hgg070_spider/spiders/zuqiu.py

@@ -12,7 +12,7 @@ class ZuqiuSpider(scrapy.Spider):
     allowed_domains = ['m.hgg070.com']
     custom_settings = {
         "ITEM_PIPELINES": {
-            "hgg070_spider.pipelines.zuqiu.ZuqiuPipeline": 200,
+            # "hgg070_spider.pipelines.zuqiu.ZuqiuPipeline": 200,
         },
         # 'LOG_LEVEL': 'DEBUG',
         # 'LOG_FILE': cpath + "/log/sports_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
@@ -35,12 +35,12 @@ class ZuqiuSpider(scrapy.Spider):
     def start_requests(self):
         url = "http://m.hgg070.com/app/member/get_league_list.php"
         h_types = [('FT', '', '130'), ('FU', 'P', '131'), ('FU', "", '130'), ('FU', 'P', '131')]
-        for h_type in h_types:
-            # show_type, isp, length = h_type
-            show_type, isp, length = h_types[3]
+        # for h_type in h_types:
+        for i, h_type in enumerate(h_types):
+            show_type, isp, length = h_type
             self.headers['Content-Length'] = length
             from_data = {
-                'uid': '013dc3a00cbd488238236010f78ab4a41af7e6ff05ceb96bc0854b60807a42eb',
+                'uid': 'a76428ffc4d1ca306ea354d3ff5013bb8095c8d4101e7ce76db97f63ff061729',
                 'langx': 'zh-cn',
                 'ltype': '3',
                 'gtype': 'FT',
@@ -50,24 +50,19 @@ class ZuqiuSpider(scrapy.Spider):
                 'isP': isp
             }
             yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers,
-                                     meta={'showtype': show_type, 'isp': isp}, dont_filter=True)
+                                     meta={'showtype': show_type, 'isp': isp, 'index': i}, dont_filter=True)
 
     def parse(self, response):
-        if response.status == 400:
-            print(response.status)
-            print('parse', response.url)
         leagues = response.xpath('//serverresponse/game/league')
         url = 'http://m.hgg070.com/app/member/get_game_list.php'
         if leagues:
             showtype = response.meta['showtype']
             isp = response.meta['isp']
-            if showtype == 'FT' and isp == '':
+            index = response.meta['index']
+            if index == 0:
                 date = ''
                 self.headers['Content-Length'] = '147'
-            elif showtype == 'FU' and isp == 'P':
-                date = 'all'
-                self.headers['Content-Length'] = '151'
-            elif showtype == 'FU' and isp == '':
+            elif index == 2:
                 date = 'all'
                 self.headers['Content-Length'] = '150'
             else:
@@ -76,7 +71,7 @@ class ZuqiuSpider(scrapy.Spider):
             for league in leagues:
                 lid = league.xpath('.//league_id/text()').extract_first()
                 from_data = {
-                    'uid': '013dc3a00cbd488238236010f78ab4a41af7e6ff05ceb96bc0854b60807a42eb',
+                    'uid': 'a76428ffc4d1ca306ea354d3ff5013bb8095c8d4101e7ce76db97f63ff061729',
                     'langx': 'zh-cn',
                     'ltype': '3',
                     'gtype': 'FT',
@@ -87,35 +82,60 @@ class ZuqiuSpider(scrapy.Spider):
                     'isP': isp
                 }
                 yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers,
-                                         meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
+                                         meta={'showtype': showtype, 'isp': isp, 'index': index}, dont_filter=True)
         else:
             print('未获取到联赛id')
             return
 
     def parse_match(self, response):
+        # showtype = response.meta['showtype']
+        # isp = response.meta['isp']
+        index = response.meta['index']
         if response.status == 400:
             print(response.status)
-            print('parse_match', response.url)
+            # print(showtype, isp)
+            print('parse_odds', response.url)
+        pass
         url = 'http://m.hgg070.com/app/member/get_game_more.php'
-        showtype = response.meta['showtype']
-        isp = response.meta['isp']
-        if showtype == 'FT' and isp == '':
+        if index == 0:
             date = ''
+            showtype = 'FT'
+            isp = ''
             self.headers['Content-Length'] = '132'
-        elif showtype == 'FU' and isp == 'P':
+        elif index == 1:
             date = 'all'
+            showtype = 'FT'
+            isp = 'P'
             self.headers['Content-Length'] = '136'
-        elif showtype == 'FU' and isp == '':
+        elif index == 2:
             date = ''
+            showtype = 'FU'
+            isp = ''
             self.headers['Content-Length'] = '132'
         else:
             date = 'all'
+            showtype = 'FU'
+            isp = 'P'
             self.headers['Content-Length'] = '136'
+    #     showtype = response.meta['showtype']
+    #     isp = response.meta['isp']
+    #     if showtype == 'FT' and isp == '':
+    #         date = ''
+    #         self.headers['Content-Length'] = '132'
+    #     elif showtype == 'FU' and isp == 'P':
+    #         date = 'all'
+    #         self.headers['Content-Length'] = '136'
+    #     elif showtype == 'FU' and isp == '':
+    #         date = ''
+    #         self.headers['Content-Length'] = '132'
+    #     else:
+    #         date = 'all'
+    #         self.headers['Content-Length'] = '136'
         gids = response.xpath('//serverresponse/game/gid/text()').extract()
         if gids:
             for gid in gids:
                 from_data = {
-                    'uid': '013dc3a00cbd488238236010f78ab4a41af7e6ff05ceb96bc0854b60807a42eb',
+                    'uid': 'a76428ffc4d1ca306ea354d3ff5013bb8095c8d4101e7ce76db97f63ff061729',
                     'langx': 'zh-cn',
                     'ltype': '3',
                     'gtype': 'FT',
@@ -128,8 +148,6 @@ class ZuqiuSpider(scrapy.Spider):
                                          meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
 
     def parse_odds(self, response):
-        # print(response.text)
-        # game_lists = []
         if response.status == 400:
             print(response.status)
             print('parse_odds', response.url)
@@ -146,7 +164,8 @@ class ZuqiuSpider(scrapy.Spider):
                     else:
                         game_odds[i.tag] = i.text
             else:
-                logger.info('gopen == "N", 详细赔率盘口未开启')
+                pass
+                # logger.info('gopen == "N", 详细赔率盘口未开启')
             item = ZuqiuItem()
             item['all'] = game_odds
             yield item

+ 0 - 114
hgg070_spider/utils/helper.py

@@ -1,17 +1,3 @@
-import datetime
-import hashlib
-import json
-import platform
-# import time
-import requests
-from requests_futures.sessions import FuturesSession
-# from scrapy import Selector
-# from selenium import webdriver
-from hgg070_spider.conf.uid import UID as u_id, UID
-# from selenium.webdriver import FirefoxOptions
-
-
-fs_session = FuturesSession()
 import hashlib
 import json
 from requests_futures.sessions import FuturesSession
@@ -19,108 +5,8 @@ from .langconv import *
 from .LocalToken import token
 
 
-
 class Helper(object):
     @staticmethod
-    def changetime(params):
-        if params.endswith('p'):
-            p_time = params[-6:-1]
-            if p_time.startswith('12'):
-                us_time = params[:-1] + ":00"
-                # print(us_time)
-                # start_time = datetime.datetime.strptime(us_time, "%Y-%m-%d %H:%M:%S") + datetime.timedelta(
-                #     hours=12)
-                # match_date = start_time.strftime("%Y-%m-%d %H:%M:%S").split(' ')[0]
-                # match_time = start_time.strftime("%Y-%m-%d %H:%M:%S").split(' ')[1]
-            else:
-                params = params[:-1] + ':00'
-                us_time = datetime.datetime.strptime(params, "%Y-%m-%d %H:%M:%S") + datetime.timedelta(
-                    hours=12)
-                # start_time = us_time + datetime.timedelta(hours=12)
-                # match_date = start_time.strftime("%Y-%m-%d %H:%M:%S").split(' ')[0]
-                # match_time = start_time.strftime("%Y-%m-%d %H:%M:%S").split(' ')[1]
-                us_time = us_time.strftime("%Y-%m-%d %H:%M:%S")
-            # pass
-        else:
-            us_time = params[:-1] + ':00'
-            # start_time = datetime.datetime.strptime(params, "%Y-%m-%d %H:%M:%S") + datetime.timedelta(
-            #     hours=12)
-            # match_date = start_time.strftime("%Y-%m-%d %H:%M:%S").split(' ')[0]
-            # match_time = start_time.strftime("%Y-%m-%d %H:%M:%S").split(' ')[1]
-        return us_time
-
-    @staticmethod
-    def get(url, params):
-        return requests.get(url, data={"data": json.dumps(params), "token": u_id['token']}, timeout=30)
-
-    @staticmethod
-    def post(url, params):
-        return requests.post(url, data={"data": json.dumps(params), "token": u_id['token']}, timeout=30)
-
-    @staticmethod
-    def async_post(url, params):
-        try:
-            # print(u_id['token'])
-            data = fs_session.post(url, data={"data": json.dumps(params), "token": u_id['token']}).result()
-            # data = fs_session.post(url, data={"data": json.dumps(params), "token": "agxrI115617094865d15cbaecca9f"}, timeout=30).result()
-            if data:
-                response_data = json.loads(data.content.decode('utf-8'))
-                # print(response_data)
-                # token异常重新获取
-                if response_data.get('status') == 10032:
-                    token = Helper.get_token()
-                    sys = platform.system()
-                    if sys == 'Windows':
-                        file_path = os.path.abspath(os.path.join(os.getcwd(), "..")) + "\\conf\\uid.py"
-                    else:
-                        file_path = os.path.abspath(os.path.join(os.getcwd(), "..")) + "/hgg070_spider/conf/uid.py"
-                    if os.path.exists(file_path):
-                        fs = open(file_path, 'w+')
-                        UID['token'] = token
-                        fs.write('UID={}'.format(UID))
-                        fs.close()
-                    data = fs_session.post(url, data={"data": json.dumps(params), "token": u_id['token']}).result()
-                    # data = fs_session.post(url, data={"data": json.dumps(params), "token": u_id['token']})
-                return data.content.decode('utf-8')
-                # return data
-        except requests.exceptions.RequestException as e:
-            print(e)
-
-    @staticmethod
-    def get_zip_data(list1, list2):
-        if list1 and list2:
-            if isinstance(list1, list) and isinstance(list2, list):
-                return dict(zip(list1, list2))
-
-    @staticmethod
-    def get_token():
-        cpath = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-        with open(cpath + '/conf/settings.json', 'r', encoding='utf8') as f:
-            data = json.load(f)
-        token_url = data['token_url']
-        username1 = data['username1']
-        password1 = data['password1']
-        r = requests.post(token_url, data={'account': username1, 'password': password1})
-        token = r.json()['data']['token']
-        return token
-
-
-
-
-    @staticmethod
-    def genearte_MD5(params):
-        # 创建md5对象
-        hl = hashlib.md5()
-        hl.update(params.encode(encoding='utf-8'))
-        # print('MD5加密前为 :' + params)
-        # print('MD5加密后为 :' + hl.hexdigest())
-        return hl.hexdigest()
-
-
-if __name__ == '__main__':
-    import os
-    print(os.path.abspath(os.path.join(os.getcwd(), "..")) + "\\conf\\uid.py")
-    print(os.path.abspath(__file__))
     def async_post(url, params):
         fs_session = FuturesSession()
         t_url, t_user, t_password, t_token = token['token_url'], token['username'], token['password'], token['token']