1
0

2 Ревизии 10c979a4fc ... a5eba01443

Автор SHA1 Съобщение Дата
  juan a5eba01443 update преди 6 години
  juan e33f264672 update преди 6 години

BIN
hgg070_spider/__pycache__/__init__.cpython-37.pyc


BIN
hgg070_spider/__pycache__/items.cpython-37.pyc


BIN
hgg070_spider/__pycache__/settings.cpython-37.pyc


+ 1 - 0
hgg070_spider/conf/uid.py

@@ -0,0 +1 @@
+UID={'uid': 'b0cifnj5m21863559l126784', 'ip': 'http://199.26.100.57/', 'cookie': '_ga=GA1.4.487451235.1562397743; OddType@21863548=H; protocolstr=http; gamePoint_21863548=2019-08-18%2A2%2A0; _gid=GA1.4.637049958.1566179135; _gat_UA-75448111-1=1', 'token': 'hyOO7G15615993365d141d6810ab7'}

+ 41 - 4
hgg070_spider/items.py

@@ -7,8 +7,45 @@
 
 import scrapy
 
+class ZuqiuItem(scrapy.Field):
+    id=scrapy.Field()
+    league=scrapy.Field()
+    team_h=scrapy.Field()
+    team_c = scrapy.Field()
+    #让球
+    ior_RTS2Y=scrapy.Field()
+    ior_RTS2N = scrapy.Field()
+    #
+    ior_REH = scrapy.Field()
+    ior_REC = scrapy.Field()
 
-class Hgg070SpiderItem(scrapy.Item):
-    # define the fields for your item here like:
-    # name = scrapy.Field()
-    pass
+    ior_ROUC = scrapy.Field()
+    ior_HROUH = scrapy.Field()
+    ratio_rouo = scrapy.Field()
+    ratio_rouu = scrapy.Field()
+
+    # ior_RMH = scrapy.Field()
+    # ior_RMC = scrapy.Field()
+    # ior_RMN = scrapy.Field()
+    #
+    # ior_RH2C1 = scrapy.Field()
+    # ior_RH3C2 = scrapy.Field()
+    # ior_CRGN = scrapy.Field()
+    # ior_RH4C4 = scrapy.Field()
+    #
+    # ior_RH1C0 = scrapy.Field()
+    # ior_RH2C0 = scrapy.Field()
+    # ior_RH2C1 = scrapy.Field()
+    # ior_RH3C0 = scrapy.Field()
+    # ior_RH3C1 = scrapy.Field()
+    # ior_RH3C2 = scrapy.Field()
+    # ior_RH4C0 = scrapy.Field()
+    # ior_RH4C1 = scrapy.Field()
+    #
+    # ior_RH4C2 = scrapy.Field()
+    # ior_RH4C3 = scrapy.Field()
+    # ior_RT01 = scrapy.Field()
+    # ior_RT23 = scrapy.Field()
+    # ior_RT46 = scrapy.Field()
+    # ratio_rouho = scrapy.Field()
+    # ratio_rouhu = scrapy.Field()

+ 21 - 0
hgg070_spider/pipelines/zuqiu.py

@@ -1,3 +1,23 @@
+<<<<<<< HEAD
+from twisted.internet import defer,reactor
+from ..utils.helper import Helper
+
+class ZuqiuPipeline(object):
+    def open_spider(self, spider):
+        pass
+    @defer.inlineCallbacks
+    def process_item(self,item,spider):
+        out=defer.Deferred()
+        reactor.callInThread(self._do_calculation,item,out)
+        yield out
+
+    def _do_calculation(self,item,out):
+        param={"id":item["id"],"league":item["league"],"team_h":item["team_h"],
+               "team_c":item["team_c"],"ior_RTS2Y":item["ior_RTS2Y"],"ior_RTS2N":item["ior_RTS2N"],
+               "ior_REH":item["ior_REH"],"ior_REC":item["ior_REC"],"ior_ROUC":item["ior_ROUC"],
+               "ior_HROUH":item["ior_HROUH"],"ratio_rouo":item["ratio_rouo"],"ratio_rouu":item["ratio_rouu"]}
+        data=Helper.async_post("")
+=======
 import datetime
 import time
 import logging
@@ -656,3 +676,4 @@ class Zuqiupipeline(object):
         # #     else:
         # #         logger.info('足球详细赔率列表为空')
         # reactor.callFromThread(out.callback, item)
+>>>>>>> 10c979a4fcc4f2d36f17fa2ecd6de7bad38358f3

+ 12 - 1
hgg070_spider/settings.py

@@ -88,6 +88,16 @@ ROBOTSTXT_OBEY = False
 #HTTPCACHE_DIR = 'httpcache'
 #HTTPCACHE_IGNORE_HTTP_CODES = []
 #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
+<<<<<<< HEAD
+
+R_HOST = '103.108.41.110'
+R_POST = 10432
+R_DB = "kaiyou"
+R_USER = "kaiyou"
+R_PASSWORD = 123456
+
+
+=======
 M_HOST = '192.168.2.200'
 # M_HOST = '127.0.0.1'
 M_POST = 27017
@@ -113,4 +123,5 @@ MATCHWARN = "http://stadmin.bocai108.com:19093/setMatchWarn"
 R_HOST = '192.168.2.200'
 R_POST = 6379
 R_DB = 1
-R_PASSWORD = 123456
+R_PASSWORD = 123456
+>>>>>>> 10c979a4fcc4f2d36f17fa2ecd6de7bad38358f3

BIN
hgg070_spider/spiders/__pycache__/__init__.cpython-37.pyc


BIN
hgg070_spider/spiders/__pycache__/zuqiu.cpython-37.pyc


+ 11 - 0
hgg070_spider/spiders/lanqiu.py

@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+import scrapy
+
+
+class LanqiuSpider(scrapy.Spider):
+    name = 'lanqiu'
+    allowed_domains = ['m.hgg070.com/']
+    start_urls = ['http://m.hgg070.com//']
+
+    def parse(self, response):
+        pass

+ 79 - 0
hgg070_spider/spiders/zuqiu.py

@@ -1,5 +1,83 @@
 # -*- coding: utf-8 -*-
 import scrapy
+<<<<<<< HEAD
+from ..items import ZuqiuItem
+
+class ZuqiuSpider(scrapy.Spider):
+    name = 'zuqiu'
+    allowed_domains = ['m.hgg070.com/']
+    headers = {
+        'Accept': '*/*',
+        'Accept-Encoding': 'gzip, deflate',
+        'Accept-Language': 'zh-CN,zh;q=0.9',
+        'Content-Length': '130',
+        'Content-type': 'application/x-www-form-urlencoded',
+        'Cookie': '_ga=GA1.2.471918301.1572059707; _gid=GA1.2.2109447865.1572059707; _gat=1',
+        'Host': 'm.hgg070.com',
+        'Origin': 'http://m.hgg070.com',
+        'Proxy-Connection': 'keep-alive',
+        'Referer': 'http://m.hgg070.com/',
+        'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Mobile Safari/537.36'
+    }
+    custom_settings={
+        "ITEM_PIPELINES": {
+            "collectSports.pipelines.zuqiu.ZuqiuPipeline": 200,
+        },
+    }
+    #读取今日足球
+    def start_requests(self):
+        url="http://m.hgg070.com/app/member/get_league_list.php"
+        from_data={
+            'uid': '7c70e73f576d42d9f6d9fb1fcaa08c47b04bb9279584caedfe65858afb26722d',
+            'langx': 'zh-cn',
+            'ltype': '3',
+            'gtype': 'FT',
+            'showtype': 'RB',
+            'sorttype': '',
+            'date': '',
+            'isP': ''
+        }
+        yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers,meta={'data': from_data}, dont_filter=True)
+
+
+    #解析今日足球,获取所有的联赛,并请求联赛详情页
+    def parse(self, response):
+        url='http://m.hgg070.com/app/member/get_game_list.php'
+        data=response.xpath("//league")
+        from_data=response.meta['data']
+        for item in data:
+            lid = item.xpath('./league_id/text()').extract_first()
+            from_data['lid'] = lid
+            yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.detailtedMsg,meta={'data': response.meta['data']},dont_filter=True)
+
+    # 获取所有玩法
+    def detailtedMsg(self,response):
+        url = 'http://m.hgg070.com/app/member/get_game_more.php'
+        data=response.xpath("//game")
+        from_data=response.meta['data']
+        for item in data:
+            lid = item.xpath('./gid/text()').extract_first()
+            from_data['gid'] = lid
+            yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.getitem,dont_filter=True)
+
+    #获取更多
+    def getitem(self,response):
+        data=response.xpath("//game")
+        for obj in data:
+            item=ZuqiuItem()
+            item.id=obj.xpath('./gid')
+            item.team_h=obj.xpath('./team_h')
+            item.team_c = obj.xpath('./team_c')
+            item.ior_RTS2Y = obj.xpath('./ior_RTS2Y')
+            item.ior_RTS2N = obj.xpath('./ior_RTS2N')
+            item.ior_REH = obj.xpath('./ior_REH')
+            item.ior_REC = obj.xpath('./ior_REC')
+            item.ior_ROUC = obj.xpath('./ior_ROUC')
+            item.ior_HROUH = obj.xpath('./ior_HROUH')
+            item.ratio_rouo = obj.xpath('./ratio_rouo')
+            item.ratio_rouu = obj.xpath('./ratio_rouu')
+            yield item
+=======
 
 
 class ZuqiuSpider(scrapy.Spider):
@@ -94,3 +172,4 @@ class ZuqiuSpider(scrapy.Spider):
         print(response.text)
         pass
 
+>>>>>>> 10c979a4fcc4f2d36f17fa2ecd6de7bad38358f3

+ 119 - 0
hgg070_spider/utils/helper.py

@@ -1,12 +1,130 @@
+<<<<<<< HEAD
+import datetime
+import hashlib
+import json
+import platform
+# import time
+import requests
+from requests_futures.sessions import FuturesSession
+# from scrapy import Selector
+# from selenium import webdriver
+from hgg070_spider.conf.uid import UID as u_id, UID
+# from selenium.webdriver import FirefoxOptions
+
+
+fs_session = FuturesSession()
+=======
 import hashlib
 import json
 from requests_futures.sessions import FuturesSession
 from .langconv import *
 from .LocalToken import token
+>>>>>>> 10c979a4fcc4f2d36f17fa2ecd6de7bad38358f3
 
 
 class Helper(object):
     @staticmethod
+<<<<<<< HEAD
+    def changetime(params):
+        if params.endswith('p'):
+            p_time = params[-6:-1]
+            if p_time.startswith('12'):
+                us_time = params[:-1] + ":00"
+                # print(us_time)
+                # start_time = datetime.datetime.strptime(us_time, "%Y-%m-%d %H:%M:%S") + datetime.timedelta(
+                #     hours=12)
+                # match_date = start_time.strftime("%Y-%m-%d %H:%M:%S").split(' ')[0]
+                # match_time = start_time.strftime("%Y-%m-%d %H:%M:%S").split(' ')[1]
+            else:
+                params = params[:-1] + ':00'
+                us_time = datetime.datetime.strptime(params, "%Y-%m-%d %H:%M:%S") + datetime.timedelta(
+                    hours=12)
+                # start_time = us_time + datetime.timedelta(hours=12)
+                # match_date = start_time.strftime("%Y-%m-%d %H:%M:%S").split(' ')[0]
+                # match_time = start_time.strftime("%Y-%m-%d %H:%M:%S").split(' ')[1]
+                us_time = us_time.strftime("%Y-%m-%d %H:%M:%S")
+            # pass
+        else:
+            us_time = params[:-1] + ':00'
+            # start_time = datetime.datetime.strptime(params, "%Y-%m-%d %H:%M:%S") + datetime.timedelta(
+            #     hours=12)
+            # match_date = start_time.strftime("%Y-%m-%d %H:%M:%S").split(' ')[0]
+            # match_time = start_time.strftime("%Y-%m-%d %H:%M:%S").split(' ')[1]
+        return us_time
+
+    @staticmethod
+    def get(url, params):
+        return requests.get(url, data={"data": json.dumps(params), "token": u_id['token']}, timeout=30)
+
+    @staticmethod
+    def post(url, params):
+        return requests.post(url, data={"data": json.dumps(params), "token": u_id['token']}, timeout=30)
+
+    @staticmethod
+    def async_post(url, params):
+        try:
+            # print(u_id['token'])
+            data = fs_session.post(url, data={"data": json.dumps(params), "token": u_id['token']}).result()
+            # data = fs_session.post(url, data={"data": json.dumps(params), "token": "agxrI115617094865d15cbaecca9f"}, timeout=30).result()
+            if data:
+                response_data = json.loads(data.content.decode('utf-8'))
+                # print(response_data)
+                # token异常重新获取
+                if response_data.get('status') == 10032:
+                    token = Helper.get_token()
+                    sys = platform.system()
+                    if sys == 'Windows':
+                        file_path = os.path.abspath(os.path.join(os.getcwd(), "..")) + "\\conf\\uid.py"
+                    else:
+                        file_path = os.path.abspath(os.path.join(os.getcwd(), "..")) + "/hgg070_spider/conf/uid.py"
+                    if os.path.exists(file_path):
+                        fs = open(file_path, 'w+')
+                        UID['token'] = token
+                        fs.write('UID={}'.format(UID))
+                        fs.close()
+                    data = fs_session.post(url, data={"data": json.dumps(params), "token": u_id['token']}).result()
+                    # data = fs_session.post(url, data={"data": json.dumps(params), "token": u_id['token']})
+                return data.content.decode('utf-8')
+                # return data
+        except requests.exceptions.RequestException as e:
+            print(e)
+
+    @staticmethod
+    def get_zip_data(list1, list2):
+        if list1 and list2:
+            if isinstance(list1, list) and isinstance(list2, list):
+                return dict(zip(list1, list2))
+
+    @staticmethod
+    def get_token():
+        cpath = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        with open(cpath + '/conf/settings.json', 'r', encoding='utf8') as f:
+            data = json.load(f)
+        token_url = data['token_url']
+        username1 = data['username1']
+        password1 = data['password1']
+        r = requests.post(token_url, data={'account': username1, 'password': password1})
+        token = r.json()['data']['token']
+        return token
+
+
+
+
+    @staticmethod
+    def genearte_MD5(params):
+        # 创建md5对象
+        hl = hashlib.md5()
+        hl.update(params.encode(encoding='utf-8'))
+        # print('MD5加密前为 :' + params)
+        # print('MD5加密后为 :' + hl.hexdigest())
+        return hl.hexdigest()
+
+
+if __name__ == '__main__':
+    import os
+    print(os.path.abspath(os.path.join(os.getcwd(), "..")) + "\\conf\\uid.py")
+    print(os.path.abspath(__file__))
+=======
     def async_post(url, params):
         fs_session = FuturesSession()
         t_url, t_user, t_password, t_token = token['token_url'], token['username'], token['password'], token['token']
@@ -46,3 +164,4 @@ class Helper(object):
         hl = hashlib.md5()
         hl.update(line.encode(encoding='utf-8'))
         return hl.hexdigest()
+>>>>>>> 10c979a4fcc4f2d36f17fa2ecd6de7bad38358f3

+ 7 - 0
main.py

@@ -0,0 +1,7 @@
+import sys
+import os
+from scrapy.cmdline import execute
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+# execute(["scrapy", "crawl", "bangqiu"])
+execute(["scrapy", "crawl", "zuqiu"])