1
0

2 コミット aee065a0cc ... 389136f91a

作者 SHA1 メッセージ 日付
  Your Name 389136f91a 新增网球滚球爬虫 6 年 前
  Your Name 9a518a0082 新增足球滚球爬虫 6 年 前

+ 2 - 2
hgg070_spider/main.py

@@ -5,7 +5,7 @@ from scrapy.cmdline import execute
 # print(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 # execute(["scrapy", "crawl", "zuqiu"])
-execute(["scrapy", "crawl", "lanqiu"])
+# execute(["scrapy", "crawl", "lanqiu"])
 # execute(["scrapy", "crawl", "lanqiu"])
 # execute(["scrapy", "crawl", "lq_sports"])
 # execute(["scrapy", "crawl", "guanjun"])
@@ -14,7 +14,7 @@ execute(["scrapy", "crawl", "lanqiu"])
 # execute(["scrapy", "crawl", "bangqiu"])
 # execute(["scrapy", "crawl", "roll_zuqiu"]) # 滚球足球 回来要解开这个注释 其他全部解封
 # execute(["scrapy", "crawl", "roll_lanqiu"]) #滚球篮球
-# execute(["scrapy", "crawl", "roll_wangqiu"]) #滚球网球
+execute(["scrapy", "crawl", "roll_wangqiu"]) #滚球网球
 # execute(["scrapy", "crawl", "roll_bangqiu"])  # 滚球棒球
 # execute(["scrapy", "crawl", "saiguo"]) #滚球id结束时间更新状态
 # execute(["scrapy", "crawl", "jieshu"]) #滚球id结束时间更新状态

+ 100 - 0
hgg070_spider/pipelines/roll_wangqiu.py

@@ -0,0 +1,100 @@
+import logging
+from twisted.internet import defer, reactor
+from ..utils.helper import Helper
+from ..settings import LEAGUE_URL,MATCH_URL
+import pymongo,time
+from ..settings import M_HOST,M_USER,M_PASSWORD,M_POST,M_DB,ODDS_URL
+
+
+class RollPipeline(object):
+    def open_spider(self, spider):
+        self.mongo = pymongo.MongoClient(host=M_HOST, username=M_USER, password=M_PASSWORD, port=M_POST,
+                                         authSource=M_DB)
+        self.db = self.mongo[M_DB]
+    @defer.inlineCallbacks
+    def process_item(self, item, spider):
+        logger = logging.getLogger(__name__)
+        logger.info("进入管道")
+        out = defer.Deferred()
+        reactor.callInThread(self._do_calculation, item, out)
+        yield out
+
+    def _do_calculation(self, item, out):
+        # 先保存联赛
+        league_name = item['league']
+        uuid = Helper.genearte_uuid(league_name)
+        type = item['showtype']
+        is_rollball, is_today, is_morningplate = 0, 0, 0
+        if type == "FT":
+            is_today = 1
+        elif type == "FU":
+            is_morningplate = 1
+        elif type == "RB":
+            is_rollball = 1
+        else:
+            is_stringscene=1
+        league_key = ["name_chinese", "kind", "match_mode", "if_stop", "last_time", "lg_id", "source", "uuid","is_rollball","is_today","is_morningplate","is_stringscene"]
+        league_value = [league_name, "1", "1", "0", item['datetime'], item['match_id'], "hgg070", uuid,is_rollball,is_today,is_morningplate,is_stringscene]
+        #赛事
+        childer = dict(zip(league_key, league_value))
+        #联赛
+        obj = {"game_code": "lq", "title": "league", "source": "hgg070","data":[childer]}
+        res=Helper.async_post(LEAGUE_URL,obj)
+        if res:
+            if res.get('status')==1:
+                logging.warning("联赛提交成功,{}1".format(res))
+                #提交赛事
+                lres=Helper.async_post(MATCH_URL,childer)
+                if lres.get('status')==1:
+                    logging.warning("赛事提交成功,{}2".format(res))
+                    #保存赔率
+                    # 是否串场
+                    if item['isP'] == 'P':
+                        ris_stringscene = 1
+                    else:
+                        ris_stringscene = 0
+                    # 现在时间,时间戳
+                    utime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+                    odds_key = ["game_code", "title", "match_id", "lg_id", "data", "source", "odds_only", "tag", "uuid",
+                                "is_stringscene", "utime", "pt", 'match_identity']
+                    odds_value = ["lq", "odds", item['match_id'], item['league_id'], item["content"], "hgg070", [],
+                                  item['more_count'], uuid,
+                                  ris_stringscene, utime, item['isP'], item["match_identity"]]
+                    # 赛事
+                    odderlist = dict(zip(odds_key, odds_value))
+                    res = Helper.async_post(ODDS_URL, odderlist)
+                    if res:
+                        if res.get('status') == 1:
+                            logging.warning("赔率提交成功,{}3".format(res))
+                        else:
+                            logging.warning("赔率提交失败,{}4".format(res))
+                    else:
+                        logging.warning("赔率提交失败,{}5".format(res))
+
+            is_stringscene = 1
+        league_key = ["name_chinese", "kind", "match_mode", "if_stop", "last_time", "lg_id", "source", "uuid",
+                      "is_rollball", "is_today", "is_morningplate", "is_stringscene"]
+        league_value = [league_name, "1", "1", "0", item['datetime'], item['id'], "hgg070", uuid, is_rollball, is_today,
+                        is_morningplate, is_stringscene]
+        # 赛事
+        childer = dict(zip(league_key, league_value))
+        # 联赛
+        obj = {"game_code": "lq", "title": "league", "source": "hgg070", "data": [childer]}
+        res = Helper.async_post(LEAGUE_URL, obj)
+
+        if res:
+            if res.get('status') == 1:
+                logging.warning("联赛提交成功,{}".format(res))
+                # 提交赛事
+                lres = Helper.async_post(MATCH_URL, childer)
+                if lres.get('status') == 1:
+                    logging.warning("联赛提交成功,{}".format(res))
+
+                else:
+                    logging.warning("赛事提交失败,{}".format(res))
+
+            else:
+                logging.warning("联赛提交失败,{}".format(res))
+        else:
+            logging.warning("联赛提交失败,{}".format(res))
+

+ 9 - 1
hgg070_spider/settings.py

@@ -114,4 +114,12 @@ MATCHWARN = "http://stadmin.bocai108.com:19093/setMatchWarn"
 R_HOST = '192.168.2.200'
 R_POST = 6379
 R_DB = 1
-R_PASSWORD = 123456
+R_PASSWORD = 123456
+
+REACTOR_THREADPOOL_MAXSIZE = 40
+# LOG_LEVEL = 'INFO'
+COOKIES_ENABLED = False
+RETRY_ENABLED = False
+DOWNLOAD_TIMEOUT = 10
+REDIRECT_ENABLED = False
+CONCURRENT_ITEMS = 1000

+ 118 - 0
hgg070_spider/spiders/roll_wangqiu.py

@@ -0,0 +1,118 @@
+# -*- coding: utf-8 -*-
+import json
+import logging
+# import lxml
+import scrapy
+import xmltodict
+
+# from ..items import ZuqiuItem
+
+
+class WangqiuSpider(scrapy.Spider):
+    name = 'roll_wangqiu'
+    allowed_domains = ['m.hg0088.com']
+    custom_settings = {
+        "ITEM_PIPELINES": {
+            "hgg070_spider.pipelines.roll_wangqiu.RollPipeline": 200,
+        },
+        # 'LOG_LEVEL': 'DEBUG',
+        # 'LOG_FILE': cpath + "/log/sports_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
+    }
+
+    headers = {
+        'Host': 'm.hg0088.com',
+        'Connection': 'keep-alive',
+        # 'Content-Length': '89', # hg0088注释
+        'Origin': 'https://m.hg0088.com',
+        'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
+        'Content-type': 'application/x-www-form-urlencoded',
+        'Accept': '*/*',
+        'Sec-Fetch-Site': 'same-origin',
+        'Sec-Fetch-Mode': 'cors',
+        'Referer': 'https://m.hg0088.com/',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept-Language': 'zh-CN,zh;q=0.9',
+        'Cookie': '_ga=GA1.2.219750064.1572659333; box4pwd_notshow=Y; _gid=GA1.2.2031225008.1572829846; _gat=1'
+    }
+
+    def start_requests(self):
+        pass
+        url = "https://m.hg0088.com/app/member/get_league_list.php"
+        from_data = {
+            'uid': 'yv8vy3csm22383986l393491',
+            'langx': 'zh-cn',
+            'ltype': '4',
+            'gtype': 'TN',
+            'showtype': 'RB',
+            'sorttype': '',
+            'date': '',
+            'isP': ''
+        }
+
+        yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse, headers=self.headers)
+
+    def parse(self, response):
+        print(111111111111)
+        leagues = response.xpath('//serverresponse/game/league')
+        url = 'https://m.hg0088.com/app/member/get_game_list.php'
+        if leagues:
+            for league in leagues:
+                lid = league.xpath('.//league_id/text()').extract_first()
+                from_data = {
+                    'uid': 'yv8vy3csm22383986l393491',
+                    'langx': 'zh-cn',
+                    'ltype': '4',
+                    'gtype': 'TN',
+                    'showtype': 'RB',
+                    'lid': lid,
+                    'sorttype': '',
+                    'date': '',
+                    'isP': ''
+                }
+                yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers, dont_filter=True)
+        else:
+            print('未获取到联赛id')
+            return
+
+    def parse_match(self, response):
+        pass
+        url = 'https://m.hg0088.com/app/member/get_game_more.php'
+        gids = response.xpath('//serverresponse/game/gid/text()').extract()
+        tags = response.xpath('//serverresponse/game/more_count/text()').extract()
+        if gids:
+            for i, gid in enumerate(gids):
+                from_data = {
+                    'uid': 'yv8vy3csm22383986l393491',
+                    'langx': 'zh-cn',
+                    'ltype': '4',
+                    'gtype': 'TN',
+                    'showtype': 'RB',
+                    'date': '',
+                    'isP': '',
+                    'gid': gid,
+                }
+                tag = tags[i]
+                yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_odds, headers=self.headers,
+                                         meta={'tag': tag}, dont_filter=True)
+
+    def parse_odds(self, response):
+        print('111111111111122333334555')
+        pass
+        # logger = logging.getLogger(__name__)
+        # index = response.meta['index']
+        # tag = response.meta['tag']
+        # game = xmltodict.parse(response.text)
+        # try:
+        #     game_odds = game['serverresponse']['game'][0]
+        # except:
+        #     game_odds = game['serverresponse']['game']
+        # if game_odds['gopen'] == 'Y':
+        #     item = ZuqiuItem()
+        #     item['data'] = game_odds
+        #     item['index'] = index
+        #     item['tag'] = tag
+        #     yield item
+        # else:
+        #     logger.info('gopen == "N", 详细赔率盘口未开启')
+        #     return
+