Your Name 6 年之前
父節點
當前提交
3799e831fd

+ 6 - 0
.idea/libraries/R_User_Library.xml

@@ -0,0 +1,6 @@
+<component name="libraryTable">
+  <library name="R User Library">
+    <CLASSES />
+    <SOURCES />
+  </library>
+</component>

+ 6 - 0
.idea/vcs.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>

+ 49 - 42
collectSports/items.py

@@ -13,55 +13,62 @@ class CollectsportsItem(scrapy.Item):
     zq_league = scrapy.Field()
     zq_match = scrapy.Field()
     zq_zhibo = scrapy.Field()
-    zq_chain=scrapy.Field()
-    zq_today=scrapy.Field()
-    csource=scrapy.Field()
+    zq_chain = scrapy.Field()
+    zq_today = scrapy.Field()
+    csource = scrapy.Field()
 
-    zaopan=scrapy.Field()
 
 class ZqToday(scrapy.Item):
-    match_id=scrapy.Field()
-    is_roll=scrapy.Field()
-    is_corner=scrapy.Field()
+    match_id = scrapy.Field()
+    is_roll = scrapy.Field()
+    is_corner = scrapy.Field()
+
 
 class ZqZhibo(scrapy.Item):
-    game_type=scrapy.Field() #类型FT 足球,BK 篮球,vb 排球
-    start_time=scrapy.Field() #开始时间
-    host_team=scrapy.Field() #主队
-    guest_team=scrapy.Field()#客队
-    doing=scrapy.Field()#是否进行中
-    showid=scrapy.Field()#显示id
-    shower=scrapy.Field()#显示类型
-    league_name=scrapy.Field()
+    game_type = scrapy.Field()  # 类型FT 足球,BK 篮球,vb 排球
+    start_time = scrapy.Field()  # 开始时间
+    host_team = scrapy.Field()  # 主队
+    guest_team = scrapy.Field()  # 客队
+    doing = scrapy.Field()  # 是否进行中
+    showid = scrapy.Field()  # 显示id
+    shower = scrapy.Field()  # 显示类型
+    league_name = scrapy.Field()
+
 
 class ZqLeagueItem(scrapy.Item):
-    league_id=scrapy.Field()
-    league_name=scrapy.Field() #联赛明称
-    league_subname=scrapy.Field() #子联赛明称
-    start_time=scrapy.Field() #开始时间
-    team_num=scrapy.Field() #球队数量
-    game_type=scrapy.Field() #联赛类型,FT ,NB
-    
+    content = scrapy.Field()
+    league_name = scrapy.Field()  # 联赛明称
+    league_subname = scrapy.Field()  # 子联赛明称
+    start_time = scrapy.Field()  # 开始时间
+    team_num = scrapy.Field()  # 球队数量
+    game_type = scrapy.Field()  # 联赛类型,FT ,NB
+    uuid = scrapy.Field()
+    # source = scrapy.Field()
+
+
 class ZqMatch(scrapy.Item):
-    match_id=scrapy.Field()
-    match_name=scrapy.Field()
-    host_id=scrapy.Field()
-    guest_id=scrapy.Field()
-    is_half=scrapy.Field() #是否上半场
-    half_match_id=scrapy.Field()
-    odds_num=scrapy.Field() #总玩法数量
-    host_name=scrapy.Field()
-    is_roll=scrapy.Field() #滚球
-    mdate=scrapy.Field() #日期
-    mtime=scrapy.Field()#时间
-    is_corner=scrapy.Field()#是否角球
-    guest_name=scrapy.Field()
-    league_id=scrapy.Field()
+    match_id = scrapy.Field()
+    match_name = scrapy.Field()
+    host_id = scrapy.Field()
+    guest_id = scrapy.Field()
+    is_half = scrapy.Field()  # 是否上半场
+    half_match_id = scrapy.Field()
+    odds_num = scrapy.Field()  # 总玩法数量
+    host_name = scrapy.Field()
+    is_roll = scrapy.Field()  # 滚球
+    mdate = scrapy.Field()  # 日期
+    mtime = scrapy.Field()  # 时间
+    is_corner = scrapy.Field()  # 是否角球
+    guest_name = scrapy.Field()
+    league_id = scrapy.Field()
+    uuid = scrapy.Field()
+    source = scrapy.Field()
+
 
 class ZqChain(scrapy.Item):
-    league_id=scrapy.Field()
-    code=scrapy.Field() #赔率代码
-    status=scrapy.Field() # 未知
-    name=scrapy.Field() #赔率名称
-    odds=scrapy.Field() #赔率
-    enabled=scrapy.Field() #启用状态
+    league_id = scrapy.Field()
+    code = scrapy.Field()  # 赔率代码
+    status = scrapy.Field()  # 未知
+    name = scrapy.Field()  # 赔率名称
+    odds = scrapy.Field()  # 赔率
+    enabled = scrapy.Field()  # 启用状态

+ 23 - 0
collectSports/main.py

@@ -0,0 +1,23 @@
+import sys
+import os
+from scrapy.cmdline import execute
+
+# print(os.path.dirname(os.path.abspath(__file__)))
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+execute(["scrapy", "crawl", "sportslst"])
+# execute(["scrapy", "crawl", "zuqiu"])
+# execute(["scrapy", "crawl", "lanqiu"])
+# execute(["scrapy", "crawl", "guanjun"])
+# execute(["scrapy", "crawl", "wangqiu"])
+# execute(["scrapy", "crawl", "wqbodan"])
+# execute(["scrapy", "crawl", "bangqiu"])
+# execute(["scrapy", "crawl", "roll_zuqiu"]) #滚球足球 回来要解开这个注释 其他全部解封
+# execute(["scrapy", "crawl", "roll_lanqiu"]) #滚球篮球
+# execute(["scrapy", "crawl", "roll_wangqiu"]) #滚球网球
+# execute(["scrapy", "crawl", "roll_bangqiu"]) #滚球棒球
+# execute(["scrapy", "crawl", "ball_status"]) #滚球id本地存
+# execute(["scrapy", "crawl", "ball_status_update"]) #滚球id结束时间更新状态
+# execute(["scrapy", "crawl", "zq_jieshu"]) #滚球id结束时间更新状态
+# execute(["scrapy", "crawl", "lq_jieshu"]) #滚球id结束时间更新状态
+# execute(["scrapy", "crawl", "wq_jieshu"]) #滚球id结束时间更新状态
+# execute(["scrapy", "crawl", "bq_jieshu"]) #滚球id结束时间更新状态

+ 90 - 96
collectSports/mcollect/hg0088/Storage.py

@@ -1,108 +1,102 @@
-from collectSports.items import * 
-from biz.zqchain import zqChain
-import copy
+from collectSports.items import *
+import uuid
+
+
 class Storage(object):
     def __init__(self):
         pass
-    def zaopan(self,re):
-        data=CollectsportsItem()
-        data['csource']='zaopan'
-        data['zq_league']=[]
-        data['zq_match']=[]
-        
-        for index in re['data']:            
-            lg=ZqLeagueItem()
-            lg['league_id']=index['league_id']
-            lg['league_name']=index['league_name']
-            
-            mc=ZqMatch()
-            
-            mc['match_id']=index['match_id']
-            mc['half_match_id']=index['half_match_id']
-            mc['host_id']=index['host_id']
-            mc['guest_id']=index['guest_id']
-            mc['host_name']=index['host_name']
-            mc['guest_name']=index['guest_name']
-            mc['is_half']=0
-            mc['league_id']=index['league_id']
-            mc['is_roll']=index['is_roll'] #是否滚球
-            mc['mdate']=index['mdate']
-            mc['mtime']=index['mtime']
-            mc['is_corner']=index['is_corner']
-            mc_half=ZqMatch()
-            mc_half['match_id']=index['half_match_id']
-            mc_half['half_match_id']=0
-            mc_half['host_id']=index['host_id']
-            mc_half['guest_id']=index['guest_id']
-            mc_half['host_name']=index['host_name']
-            mc_half['guest_name']=index['guest_name']
-            mc_half['is_half']=1
-            mc_half['league_id']=index['league_id']
-            mc_half['is_roll']=index['is_roll'] #是否滚球
-            mc_half['mdate']=index['mdate']
-            mc_half['mtime']=index['mtime']
-            mc_half['is_corner']=index['is_corner']
-       
-            data['zq_league'].append(lg)
+
+    def zaopan(self, re):
+        data = CollectsportsItem()
+        data['csource'] = 'zaopan'
+        data['zq_league'] = data['zq_match'] = []
+
+        for index in re['data']:
+            i_uuid = uuid.uuid4()
+            # print(i_uuid)
+            lg = ZqLeagueItem()
+            # lg['league_id'] = index['league_id']
+            lg['league_name'] = index['league_name']
+            lg['uuid'] = i_uuid
+            # lg['source'] = 'hg0088'
+            lg['content']={'league_id':index['league_id'],'source':'hg0088'}
+            mc = ZqMatch()
+            mc['match_id'] = index['match_id']
+            mc['half_match_id'] = index['half_match_id']
+            mc['host_id'] = index['host_id']
+            mc['guest_id'] = index['guest_id']
+            mc['host_name'] = index['host_name']
+            mc['guest_name'] = index['guest_name']
+            mc['is_half'] = 0
+            mc['league_id'] = index['league_id']
+            mc['is_roll'] = index['is_roll']  # 是否滚球
+            mc['mdate'] = index['mdate']
+            mc['mtime'] = index['mtime']
+            mc['is_corner'] = index['is_corner']
+            mc['uuid'] = i_uuid
+            mc['source'] = 'hg0088'
             data['zq_match'].append(mc)
-            data['zq_match'].append(mc_half)
-        
+            mc['is_half'] = 1
+            mc['match_id'] = index['half_match_id']
+            mc['half_match_id'] = 0
+            data['zq_match'].append(mc)
+            data['zq_league'].append(lg)
         return data
-    def zhibo(self,re):
-        data=CollectsportsItem()
-        data['csource']='zhibo'
-        data['zq_zhibo']=[]
+
+    def zhibo(self, re):
+        data = CollectsportsItem()
+        data['csource'] = 'zhibo'
+        data['zq_zhibo'] = []
         for index in re:
-            lg=ZqZhibo()
-            lg['game_type']=index['game_type']
-            lg['start_time']=index['start_time']
-            lg['host_team']=index['host_team']   
-            lg['guest_team']=index['guest_team']
-            lg['doing']=index['doing']
-            lg['showid']=index['showid']
-            lg['shower']=index['shower']
-            lg['league_name']=index['league_name']
+            lg = ZqZhibo()
+            lg['game_type'] = index['game_type']
+            lg['start_time'] = index['start_time']
+            lg['host_team'] = index['host_team']
+            lg['guest_team'] = index['guest_team']
+            lg['doing'] = index['doing']
+            lg['showid'] = index['showid']
+            lg['shower'] = index['shower']
+            lg['league_name'] = index['league_name']
             data['zq_zhibo'].append(lg)
-            
+
         return data
 
-    def chain(self,re):
-            data=CollectsportsItem()
-            data['csource']='chain'
-            data['zq_league']=[]
-            data['zq_match']=[]
-            
-            for index in re:                
-                lg=ZqLeagueItem()
-                lg['start_time']=index['start_time']
-                lg['league_id']=index['league_id']
-                lg['league_name']=index['league_name']
-                lg['league_subname']=index['league_subname']
-                lg['team_num']=index['dcount']
-                lg['game_type']=index['game_type']
-                
-                for item in index['items']:
-                    mc=ZqMatch()
-                    mc['league_id']=index['league_id']
-                    mc['status']=item['status']
-                    mc['code']=item['code']
-                    mc['name']=item['name']
-                    mc['enabled']=item['enabled']
-                    mc['odds']=item['odds']
-                    data['zq_match'].append(mc)
-                data['zq_league'].append(lg)
-           
-            return data
-    def today(self,re):
-        data=CollectsportsItem()
-        data['csource']='today'
-        data['zq_today']=[]
-    
+    def chain(self, re):
+        data = CollectsportsItem()
+        data['csource'] = 'chain'
+        data['zq_league'] = data['zq_chain'] = []
+        for index in re:
+
+            lg = ZqLeagueItem()
+            lg['start_time'] = index['start_time']
+            lg['league_id'] = index['league_id']
+            lg['league_name'] = index['league_name']
+            lg['league_subname'] = index['league_subname']
+            lg['team_num'] = index['dcount']
+            lg['game_type'] = index['game_type']
+
+            for item in index['items']:
+                mc = ZqChain()
+                mc['league_id'] = index['league_id']
+                mc['status'] = item['status']
+                mc['code'] = item['code']
+                mc['name'] = item['name']
+                mc['enabled'] = item['enabled']
+                mc['odds'] = item['odds']
+                data['zq_chain'].append(mc)
+            data['zq_league'].append(lg)
+
+        return data
+
+    def today(self, re):
+        data = CollectsportsItem()
+        data['csource'] = 'today'
+        data['zq_today'] = []
+
         for index in re:
-            lg=ZqToday()
-            lg['match_id']=index['match_id']
-            lg['is_roll']=index['is_roll']
-            lg['is_corner']=index['is_corner']
+            lg = ZqToday()
+            lg['match_id'] = index['match_id']
+            lg['is_roll'] = index['is_roll']
+            lg['is_corner'] = index['is_corner']
             data['zq_today'].append(lg)
-            
         return data

+ 32 - 5
collectSports/pipelines/sportslst.py

@@ -4,12 +4,39 @@
 #
 # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
+import json
+import pymongo
+
+import requests
+
+from pycomm.mongo import DBMongo
 
 
 class SportslstPipeline(object):
+    def open_spider(self, spider):
+        self.host = '192.168.2.200'
+        self.port = 27017
+        self.dbname = 'kaiyou'  # 数据库名
+        self.client = pymongo.MongoClient(host=self.host, port=self.port)
+        self.tdb = self.client[self.dbname]
+        self.port = self.tdb['zq_league']  # 表名
+    #     competition
+        self.port2 = self.tdb['zq_competition']  # 表名
+
     def process_item(self, item, spider):
-        # pass
-        # if 'zq_league' in item:
-            # print(item)
-        print(item['zq_match'])
-        # return item
+        zaopan = item['csource']
+        if zaopan == 'zaopan':
+            zq_leagues = set(item['zq_league'])
+            for zq_league in zq_leagues:
+                try:
+                    league_name = zq_league['league_name']
+                except:
+                    league_name = None
+                if league_name:
+                    self.port.insert(dict(zq_league))
+                if zq_league.get('host_name', None):
+                    self.port2.insert(dict(zq_league))
+            return item
+
+    def close_spider(self, spider):
+        self.client.close()

+ 46 - 52
collectSports/spiders/sportslst.py

@@ -1,88 +1,82 @@
 # -*- coding: utf-8 -*-
-import scrapy,lxml.etree,pycomm,json
+import scrapy, lxml.etree, pycomm, json
 from collectSports.items import *
 # from mcollect.hg0088 import Resolver
 from biz.zqleague import zqLeague
 
 
 class SportslstSpider(scrapy.Spider):
-    curSrc=None
+    curSrc = None
     name = 'sportslst'
     allowed_domains = ['hg0088.com']
     # start_urls = ['http://hg0088.com/']
     custom_settings = {
-            "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
-            "Accept-Encoding":"gzip, deflate",
-            "Accept-Language":"zh-CN,zh;q=0.8",
-            "Cache-Control":"max-age=0",
-            "Connection":"keep-alive",
-            "Cookie":"OddType@21627573=H; protocolstr=http; gamePoint_21627573=2019-05-10%2A0%2A0; _ga=GA1.4.601418716.1557495256; _gid=GA1.4.1118061739.1557495256",
-            "Host":"199.26.100.178",
-            "USER_AGENT": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36",
-            "ITEM_PIPELINES": {
-                "collectSports.pipelines.sportslst.SportslstPipeline": 200,
-            }
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+        "Accept-Encoding": "gzip, deflate",
+        "Accept-Language": "zh-CN,zh;q=0.8",
+        "Cache-Control": "max-age=0",
+        "Connection": "keep-alive",
+        "Cookie": "OddType@21627573=H; protocolstr=http; gamePoint_21627573=2019-05-10%2A0%2A0; _ga=GA1.4.601418716.1557495256; _gid=GA1.4.1118061739.1557495256",
+        "Host": "199.26.100.178",
+        "USER_AGENT": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36",
+        "ITEM_PIPELINES": {
+            "collectSports.pipelines.sportslst.SportslstPipeline": 200,
         }
+    }
 
     # start_url= 'http://199.26.100.178/app/member/get_game_allbets.php'
 
     def start_requests(self):
-        self.curSrc=source=self.getCurrentSource()
-        mc=__import__('mcollect.'+source)
-        srcObj=getattr(mc,source)
+        self.curSrc = source = self.getCurrentSource()
+        mc = __import__('mcollect.' + source)
+        srcObj = getattr(mc, source)
         # zl=zqLeague()
         # zl.update({'league_id':1})
         for item in srcObj.links:
-            url=item['url'].format(uid=srcObj.uid,page=1)
+            url = item['url'].format(uid=srcObj.uid, page=1)
             if not item['cb']:
-                params='default'
+                params = 'default'
             else:
-                params=str(item['cb'])
-            request = scrapy.FormRequest(url,  callback=self.parse,meta={'cb':params,'subdel':0})
+                params = str(item['cb'])
+            request = scrapy.FormRequest(url, callback=self.parse, meta={'cb': params, 'subdel': 0})
             yield request
-        
-
 
     def getCurrentSource(self):
-        conf=pycomm.getCache('conf')
+        conf = pycomm.getCache('conf')
         if 'currentSource' in conf:
             return conf['currentSource']
-        return 
+        return
 
     def parse(self, response):
-        cb=response.meta['cb']
-        subdel=response.meta['subdel']
-        mc=__import__('mcollect.'+self.curSrc+'.Resolver',fromlist=True)
-        res=mc.Resolver()
-        cbk=getattr(res,cb)
-        re=cbk(response.body)
-        re=json.loads(re)
+        cb = response.meta['cb']
+        subdel = response.meta['subdel']
+        mc = __import__('mcollect.' + self.curSrc + '.Resolver', fromlist=True)
+        res = mc.Resolver()
+        cbk = getattr(res, cb)
+        re = cbk(response.body)
+        re = json.loads(re)
         print(subdel)
-        if subdel==0:
+        if subdel == 0:
             if 'total_page' in re:
-                self.subStart_request(re['total_page'],response.url,response.meta['cb'],'page_no')
+                self.subStart_request(re['total_page'], response.url, response.meta['cb'], 'page_no')
                 print(6666)
-        mcs=__import__('mcollect.'+self.curSrc+'.Storage',fromlist=True)
-        ress=mcs.Storage()
-        cbks=getattr(ress,cb)
-        result=cbks(re)
-      
+        mcs = __import__('mcollect.' + self.curSrc + '.Storage', fromlist=True)
+        ress = mcs.Storage()
+        cbks = getattr(ress, cb)
+        result = cbks(re)
+
         yield result
-        
-    def subStart_request(self,total_page,url,cb,page_name='page'):
-        print(1111)
-        while total_page>1:
-            newurl=url.replace(page_name+'=0',page_name+'='+total_page)
-            newurl=newurl.replace(page_name+'=1',page_name+'='+total_page)
+
+    def subStart_request(self, total_page, url, cb, page_name='page'):
+        while total_page > 1:
+            print(1111)
+            newurl = url.replace(page_name + '=0', page_name + '=' + total_page)
+            newurl = newurl.replace(page_name + '=1', page_name + '=' + total_page)
             print(newurl)
             if not cb:
-                params='default'
+                params = 'default'
             else:
-                params=str(cb)
-            request = scrapy.FormRequest(newurl,  callback=self.parse,meta={'cb':params,'subdel':1})            
-            total_page=total_page-1
+                params = str(cb)
+            request = scrapy.FormRequest(newurl, callback=self.parse, meta={'cb': params, 'subdel': 1})
+            total_page = total_page - 1
             yield request
-        
-
-
-