6 år sedan · f8e1a09e3b
--- a/hg3535/spiders/other_saiguo.py
+++ b/hg3535/spiders/other_saiguo.py
@@ -1,137 +0,0 @@
 
				-import datetime
			
 
				-import re
			
 
				-
			
 
				-import scrapy
			
 
				-
			
 
				-from ..items import Hgsaiguo
			
 
				-
			
 
				-
			
 
				-class HgjieshuSpider(scrapy.Spider):
			
 
				-    name = 'other_saiguo'
			
 
				-    to_day = datetime.datetime.now()
			
 
				-    allowed_domains = ['hg3535z.com']
			
 
				-    custom_settings = {
			
 
				-        "ITEM_PIPELINES": {
			
 
				-            'hg3535.pipeline.saiguo.Jieshuqiupipeline': 300,
			
 
				-        },
			
 
				-        # 'LOG_LEVEL': 'DEBUG',
			
 
				-        # 'LOG_FILE': "../hg3535/log/saiguo{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
			
 
				-    }
			
 
				-
			
 
				-    def start_requests(self):
			
 
				-        for y in range(1, 5):
			
 
				-            url = 'https://www.hg3535.cn/zh-cn/info-centre/sportsbook-info/results/{}/normal/1'.format(y)
			
 
				-            yield scrapy.Request(url=url, callback=self.parse, dont_filter=True, meta={'pt': y})
			
 
				-
			
 
				-    def parse(self, response):
			
 
				-        if response.status == 200:
			
 
				-            pt = response.meta['pt']
			
 
				-            if pt == 1:
			
 
				-                tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
			
 
				-                # 获得所有比赛id对象
			
 
				-                tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
			
 
				-                # 所有比赛id列表
			
 
				-                temaid_list = [i.extract().replace('e-', "") for i in tema_id]
			
 
				-                temascore_list = []
			
 
				-                for score in tema_score:
			
 
				-                    # 正则匹配规则
			
 
				-                    p1 = r"\d{1,3}-\d{1,3}"
			
 
				-                    pattern1 = re.compile(p1)
			
 
				-                    try:
			
 
				-                        # 获取正则匹配结果
			
 
				-                        c = pattern1.findall(score.extract())[0]
			
 
				-                        temascore_list.append(c)
			
 
				-                    except:
			
 
				-                        c = ""
			
 
				-                        temascore_list.append(c)
			
 
				-                # 赛事id，赛事比元组列表
			
 
				-                tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
			
 
				-                for y in tema_tupe:
			
 
				-                    if y[1]:
			
 
				-                        item = Hgsaiguo()
			
 
				-                        item['id_score'] = y
			
 
				-                        item['pt'] = pt
			
 
				-                        yield item
			
 
				-            if pt == 2:
			
 
				-                tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
			
 
				-                # 获得所有比赛id对象
			
 
				-                tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
			
 
				-                # 所有比赛id列表
			
 
				-                temaid_list = [i.extract().replace('e-', "") for i in tema_id]
			
 
				-                temascore_list = []
			
 
				-                for score in tema_score:
			
 
				-                    # 正则匹配规则
			
 
				-                    p1 = r"\d{1,3}-\d{1,3}"
			
 
				-                    pattern1 = re.compile(p1)
			
 
				-                    try:
			
 
				-                        # 获取正则匹配结果
			
 
				-                        c = pattern1.findall(score.extract())[0]
			
 
				-                        temascore_list.append(c)
			
 
				-                    except:
			
 
				-                        c = ""
			
 
				-                        temascore_list.append(c)
			
 
				-                # 赛事id，赛事比元组列表
			
 
				-                tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
			
 
				-                for y in tema_tupe:
			
 
				-                    if y[1]:
			
 
				-                        item = Hgsaiguo()
			
 
				-                        item['id_score'] = y
			
 
				-                        item['pt'] = pt
			
 
				-                        yield item
			
 
				-
			
 
				-            if pt == 3:
			
 
				-                # 获得所有比赛获胜人，判断赛事是否结束
			
 
				-                # tema_score = response.xpath('//div[@class="flex-wrap"]/../div[4]/text()')
			
 
				-                tema_score = response.xpath('//div[@class="flex-wrap"]/../div[4]/@title')
			
 
				-                # 获得所有比赛id对象
			
 
				-                tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
			
 
				-                # 所有比赛id列表
			
 
				-                temaid_list = [i.extract().replace('e-', "") for i in tema_id]
			
 
				-                temascore_list = []
			
 
				-                for score in tema_score:
			
 
				-                    # 正则匹配规则
			
 
				-                    # p1 = r"\d{1,3}-\d{1,3}"
			
 
				-                    # pattern1 = re.compile(p1)
			
 
				-                    try:
			
 
				-                        # 获取正则匹配结果
			
 
				-                        c = score.extract()
			
 
				-                        temascore_list.append(c)
			
 
				-                    except:
			
 
				-                        c = ""
			
 
				-                        temascore_list.append(c)
			
 
				-                # 赛事id，赛事比元组列表
			
 
				-                tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
			
 
				-                for y in tema_tupe:
			
 
				-                    if y[1]:
			
 
				-                        item = Hgsaiguo()
			
 
				-                        item['id_score'] = y
			
 
				-                        item['pt'] = pt
			
 
				-                        yield item
			
 
				-
			
 
				-            if pt == 4:
			
 
				-                tema_score = response.xpath('//div[@class="flex-wrap"]/../div[5]')
			
 
				-                # 获得所有比赛id对象
			
 
				-                tema_id = response.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
			
 
				-                # str.replace()
			
 
				-                # 所有比赛id列表
			
 
				-                temaid_list = [i.extract().replace('e-', "") for i in tema_id]
			
 
				-                temascore_list = []
			
 
				-                for score in tema_score:
			
 
				-                    # 正则匹配规则
			
 
				-                    p1 = r"\d{1,3}-\d{1,3}"
			
 
				-                    pattern1 = re.compile(p1)
			
 
				-                    try:
			
 
				-                        # 获取正则匹配结果
			
 
				-                        c = pattern1.findall(score.extract())[0]
			
 
				-                        temascore_list.append(c)
			
 
				-                    except:
			
 
				-                        c = ""
			
 
				-                        temascore_list.append(c)
			
 
				-                # 赛事id，赛事比元组列表
			
 
				-                tema_tupe = {(temaid_list[i], temascore_list[i]) for i in range(len(temaid_list))}
			
 
				-                for y in tema_tupe:
			
 
				-                    if y[1]:
			
 
				-                        item = Hgsaiguo()
			
 
				-                        item['id_score'] = y
			
 
				-                        item['pt'] = pt
			
 
				-                        yield item
			
--- a/hg3535/spiders/saiguo.py
+++ b/hg3535/spiders/saiguo.py
@@ -7,7 +7,6 @@ from lxml import etree
 
				 from ..items import Hgsaiguo
			
 
				 
			
 
				 
			
 
				-
			
 
				 class HgjieshuSpider(scrapy.Spider):
			
 
				     name = 'saiguo'
			
 
				     to_day = datetime.datetime.now()