1
0

2 Commits 5e85feb357 ... 88e2fb09cd

Autor SHA1 Mensagem Data
  juan 88e2fb09cd Merge branch 'master' of http://git.bocai108.com:10180/Odin/hgg070 há 6 anos atrás
  juan 4b2a38e3eb update há 6 anos atrás

BIN
hgg070_spider/__pycache__/settings.cpython-37.pyc


+ 1 - 1
hgg070_spider/main.py

@@ -4,7 +4,7 @@ from scrapy.cmdline import execute
 
 # print(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
-execute(["scrapy", "crawl", "zuqiu"])
+execute(["scrapy", "crawl", "lanqiu"])
 # execute(["scrapy", "crawl", "lanqiu"])
 # execute(["scrapy", "crawl", "guanjun"])
 # execute(["scrapy", "crawl", "wangqiu"])

BIN
hgg070_spider/spiders/__pycache__/lanqiu.cpython-37.pyc


BIN
hgg070_spider/spiders/__pycache__/zuqiu.cpython-37.pyc


+ 62 - 2
hgg070_spider/spiders/lanqiu.py

@@ -1,11 +1,71 @@
 # -*- coding: utf-8 -*-
 import scrapy
-
+import re
 
 class LanqiuSpider(scrapy.Spider):
     name = 'lanqiu'
     allowed_domains = ['m.hgg070.com/']
     start_urls = ['http://m.hgg070.com//']
+    remath=re.compile("篮球")
+    # custom_settings={
+    #     "ITEM_PIPELINES": {
+    #         "collectSports.pipelines.zuqiu.ZuqiuPipeline": 200,
+    #     },
+    # }
+    def start_requests(self):
+        #今日,早盘
+        h_types=[('FT'),('FU')]
+        headers = {
+            'Accept': '*/*',
+            'Accept-Encoding': 'gzip, deflate',
+            'Accept-Language': 'zh-CN,zh;q=0.9',
+            'Connection': 'keep-alive',
+            'Content-Length': '130',
+            'Content-type': 'application/x-www-form-urlencoded',
+            'Cookie': '_ga=GA1.2.471918301.1572059707; _gid=GA1.2.2109447865.1572059707; _gat=1',
+            'Host': 'm.hgg070.com',
+            'Origin': 'http://m.hgg070.com',
+            'Referer': 'http://m.hgg070.com/',
+            'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Mobile Safari/537.36'
+        }
+        url = "http://m.hgg070.com/app/member/get_league_list.php"
+        for item in h_types:
+            showtype = item
+            data={
+                'uid': '7554a670e92d06105fe567b75e5b80fe65e6e40167f4979c8d74ca5eaa461d4d',
+                'langx': 'zh-cn',
+                'ltype': '3',
+                'gtype': 'BK',
+                'showtype': showtype,
+                'sorttype': '',
+                'date': '',
+                'isP': ''
+            }
+            yield scrapy.FormRequest(url=url,formdata=data,callback=self.parse,headers=headers,
+                                      meta={"data":data}, dont_filter=True)
 
     def parse(self, response):
-        pass
+        #获取id并判断抓取的球型
+        data=response.meta["data"]
+        league=response.xpath('//league')
+        url="http://m.hgg070.com/app/member/get_game_list.php"
+        for le in league:
+            name=le.xpath('./league_name/text()').extract_first()
+            if len(self.remath.findall(name))>0:
+                lid = le.xpath('./league_id/text()').extract_first()
+                # 抓取今日
+                if data["showtype"]=="FT":
+                    data['lid'],data['sorttype'],data['date']=lid,'league',''
+                # 抓取早盘
+                elif data["showtype"]=="FU":
+                    data['lid'], data['sorttype'], data['date'] = lid, 'league', 'all'
+                print('77777777777777777777777777777777777777777777L',data)
+                yield scrapy.FormRequest(url=url,formdata=data,callback=self.detailball,meta=response.meta["data"],dont_filter=True)
+
+    def detailball(self,response):
+        print('******************************************************88',response.text)
+
+
+
+
+

+ 0 - 2
hgg070_spider/spiders/zuqiu.py

@@ -1,7 +1,5 @@
 # -*- coding: utf-8 -*-
 import scrapy
-
-
 class ZuqiuSpider(scrapy.Spider):
     name = 'zuqiu'
     allowed_domains = ['m.hgg070.com']

+ 2 - 2
main.py

@@ -3,5 +3,5 @@ import os
 from scrapy.cmdline import execute
 
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
-# execute(["scrapy", "crawl", "bangqiu"])
-execute(["scrapy", "crawl", "zuqiu"])
+execute(["scrapy", "crawl", "lanqiu"])
+# execute(["scrapy", "crawl", "zuqiu"])