1
0

3 Incheckningar d1a3054766 ... 8f89d25a38

Upphovsman SHA1 Meddelande Datum
  Your Name 8f89d25a38 Merge remote-tracking branch 'origin/master' 6 år sedan
  Your Name 228ed9a847 修改 6 år sedan
  Your Name 5cf840783d 更新 6 år sedan

+ 4 - 1
.gitignore

@@ -6,7 +6,10 @@ bin-release/
 .idea
 # Other files and folders
 .settings/
-
+__pycache__
+pipelines/__pycache__
+spiders/__pycache__
+utils/__pycache__
 # Project files, i.e. `.project`, `.actionScriptProperties` and `.flexProperties`
 # should NOT be excluded as they contain compiler settings and other important
 # information for Eclipse / Flash Builder.

+ 2 - 41
hgg070_spider/items.py

@@ -7,45 +7,6 @@
 
 import scrapy
 
-class ZuqiuItem(scrapy.Field):
-    id=scrapy.Field()
-    league=scrapy.Field()
-    team_h=scrapy.Field()
-    team_c = scrapy.Field()
-    #让球
-    ior_RTS2Y=scrapy.Field()
-    ior_RTS2N = scrapy.Field()
-    #
-    ior_REH = scrapy.Field()
-    ior_REC = scrapy.Field()
-
-    ior_ROUC = scrapy.Field()
-    ior_HROUH = scrapy.Field()
-    ratio_rouo = scrapy.Field()
-    ratio_rouu = scrapy.Field()
 
-    # ior_RMH = scrapy.Field()
-    # ior_RMC = scrapy.Field()
-    # ior_RMN = scrapy.Field()
-    #
-    # ior_RH2C1 = scrapy.Field()
-    # ior_RH3C2 = scrapy.Field()
-    # ior_CRGN = scrapy.Field()
-    # ior_RH4C4 = scrapy.Field()
-    #
-    # ior_RH1C0 = scrapy.Field()
-    # ior_RH2C0 = scrapy.Field()
-    # ior_RH2C1 = scrapy.Field()
-    # ior_RH3C0 = scrapy.Field()
-    # ior_RH3C1 = scrapy.Field()
-    # ior_RH3C2 = scrapy.Field()
-    # ior_RH4C0 = scrapy.Field()
-    # ior_RH4C1 = scrapy.Field()
-    #
-    # ior_RH4C2 = scrapy.Field()
-    # ior_RH4C3 = scrapy.Field()
-    # ior_RT01 = scrapy.Field()
-    # ior_RT23 = scrapy.Field()
-    # ior_RT46 = scrapy.Field()
-    # ratio_rouho = scrapy.Field()
-    # ratio_rouhu = scrapy.Field()
+class ZuqiuItem(scrapy.Field):
+    all = scrapy.Field()

+ 4 - 0
hgg070_spider/pipelines/__init__.py

@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.

+ 20 - 13
hgg070_spider/pipelines/zuqiu.py

@@ -1,18 +1,25 @@
-from twisted.internet import defer,reactor
-from ..utils.helper import Helper
+
+
+# from twisted.internet import defer,reactor
+# from ..utils.helper import Helper
+
 
 class ZuqiuPipeline(object):
     def open_spider(self, spider):
         pass
-    @defer.inlineCallbacks
-    def process_item(self,item,spider):
-        out=defer.Deferred()
-        reactor.callInThread(self._do_calculation,item,out)
-        yield out
+    # @defer.inlineCallbacks
+    # def process_item(self,item,spider):
+    #     out=defer.Deferred()
+    #     reactor.callInThread(self._do_calculation,item,out)
+    #     yield out
+
+    # def _do_calculation(self,item,out):
+    #     pass
 
-    def _do_calculation(self,item,out):
-        param={"id":item["id"],"league":item["league"],"team_h":item["team_h"],
-               "team_c":item["team_c"],"ior_RTS2Y":item["ior_RTS2Y"],"ior_RTS2N":item["ior_RTS2N"],
-               "ior_REH":item["ior_REH"],"ior_REC":item["ior_REC"],"ior_ROUC":item["ior_ROUC"],
-               "ior_HROUH":item["ior_HROUH"],"ratio_rouo":item["ratio_rouo"],"ratio_rouu":item["ratio_rouu"]}
-        data=Helper.async_post("")
+    def process_item(self, item, spider):
+        all = item['all']
+        team_h, team_c = all['team_h'], all['team_c']
+        league, league_id = all['league'], all['league']
+        datetime, re_time = all['datetime'], all['re_time']
+        match_id = all['gid']
+        print(league, team_h, team_c, datetime, match_id, league_id)

+ 52 - 8
hgg070_spider/spiders/zuqiu.py

@@ -1,8 +1,23 @@
 # -*- coding: utf-8 -*-
+import logging
+import lxml
+
 import scrapy
+
+from items import ZuqiuItem
+
+
 class ZuqiuSpider(scrapy.Spider):
     name = 'zuqiu'
     allowed_domains = ['m.hgg070.com']
+    custom_settings = {
+        "ITEM_PIPELINES": {
+            "hgg070_spider.pipelines.zuqiu.ZuqiuPipeline": 200,
+        },
+        # 'LOG_LEVEL': 'DEBUG',
+        # 'LOG_FILE': cpath + "/log/sports_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
+    }
+
     headers = {
         'Accept': '*/*',
         'Accept-Encoding': 'gzip, deflate',
@@ -17,15 +32,15 @@ class ZuqiuSpider(scrapy.Spider):
         'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
     }
 
-    # 读取今日足球
     def start_requests(self):
         url = "http://m.hgg070.com/app/member/get_league_list.php"
-        h_types = [('FT', '', '130'), ('FU', 'P', '131'), ('FU', "", '130'), ('FU', '', '131')]
+        h_types = [('FT', '', '130'), ('FU', 'P', '131'), ('FU', "", '130'), ('FU', 'P', '131')]
         for h_type in h_types:
-            show_type, isp, length = h_type
+            # show_type, isp, length = h_type
+            show_type, isp, length = h_types[3]
             self.headers['Content-Length'] = length
             from_data = {
-                'uid': '4f6573b99cc31f7be1579b063888a8a2bcda122ce94228510cda9e1bb32a477f',
+                'uid': '013dc3a00cbd488238236010f78ab4a41af7e6ff05ceb96bc0854b60807a42eb',
                 'langx': 'zh-cn',
                 'ltype': '3',
                 'gtype': 'FT',
@@ -38,6 +53,9 @@ class ZuqiuSpider(scrapy.Spider):
                                      meta={'showtype': show_type, 'isp': isp}, dont_filter=True)
 
     def parse(self, response):
+        if response.status == 400:
+            print(response.status)
+            print('parse', response.url)
         leagues = response.xpath('//serverresponse/game/league')
         url = 'http://m.hgg070.com/app/member/get_game_list.php'
         if leagues:
@@ -58,7 +76,7 @@ class ZuqiuSpider(scrapy.Spider):
             for league in leagues:
                 lid = league.xpath('.//league_id/text()').extract_first()
                 from_data = {
-                    'uid': '4f6573b99cc31f7be1579b063888a8a2bcda122ce94228510cda9e1bb32a477f',
+                    'uid': '013dc3a00cbd488238236010f78ab4a41af7e6ff05ceb96bc0854b60807a42eb',
                     'langx': 'zh-cn',
                     'ltype': '3',
                     'gtype': 'FT',
@@ -70,8 +88,14 @@ class ZuqiuSpider(scrapy.Spider):
                 }
                 yield scrapy.FormRequest(url=url, formdata=from_data, callback=self.parse_match, headers=self.headers,
                                          meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
+        else:
+            print('未获取到联赛id')
+            return
 
     def parse_match(self, response):
+        if response.status == 400:
+            print(response.status)
+            print('parse_match', response.url)
         url = 'http://m.hgg070.com/app/member/get_game_more.php'
         showtype = response.meta['showtype']
         isp = response.meta['isp']
@@ -91,7 +115,7 @@ class ZuqiuSpider(scrapy.Spider):
         if gids:
             for gid in gids:
                 from_data = {
-                    'uid': '4f6573b99cc31f7be1579b063888a8a2bcda122ce94228510cda9e1bb32a477f',
+                    'uid': '013dc3a00cbd488238236010f78ab4a41af7e6ff05ceb96bc0854b60807a42eb',
                     'langx': 'zh-cn',
                     'ltype': '3',
                     'gtype': 'FT',
@@ -104,5 +128,25 @@ class ZuqiuSpider(scrapy.Spider):
                                          meta={'showtype': showtype, 'isp': isp}, dont_filter=True)
 
     def parse_odds(self, response):
-        print(response.text)
-        pass
+        # print(response.text)
+        # game_lists = []
+        if response.status == 400:
+            print(response.status)
+            print('parse_odds', response.url)
+        game = response.xpath('//serverresponse/game')[0]
+        logger = logging.getLogger(__name__)
+        if game:
+            game_odds = {}
+            gopen = game.xpath('//game/gopen/text()').extract_first()
+            if gopen == 'Y':
+                game = lxml.etree.fromstring(game.extract())
+                for i in game.getchildren():
+                    if i.text == None:
+                        game_odds[i.tag] = ""
+                    else:
+                        game_odds[i.tag] = i.text
+            else:
+                logger.info('gopen == "N", 详细赔率盘口未开启')
+            item = ZuqiuItem()
+            item['all'] = game_odds
+            yield item