aqm
/
sports_scrapy


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
							import datetime
import re

import scrapy
from lxml import etree

# from ..items import Hgsaiguo


class HgjieshuSpider(scrapy.Spider):
    name = 'saiguo'
    to_day = datetime.datetime.now()
    allowed_domains = ['hg3535z.com']
    custom_settings = {
        "ITEM_PIPELINES": {
            'hg3535.pipeline.saiguo.Jieshuqiupipeline': 300,
        },
        # 'LOG_LEVEL': 'DEBUG',
        # 'LOG_FILE': "../hg3535/log/saiguo{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
    }

    def start_requests(self):
        for y in range(1, 5):
            url = 'https://www.hg3535.cn/zh-cn/info-centre/sportsbook-info/results/{}/normal/1'.format(y)
            yield scrapy.Request(url=url, callback=self.parse, dont_filter=True, meta={'pt': y})

    def parse(self, response):
        if response.status == 200:
            pt = response.meta['pt']
            # 足球赛果
            if pt == 1:
                league_ids = response.xpath('//div[@class="rt-l-bar football"]/@id').extract()
                league_names = response.xpath('//div[@class="rt-l-bar football"]/span[@class="comp-txt"]/text()').extract()
                for index in range(len(league_ids)):
                    league_id = league_ids[index]
                    league_name = league_names[index]
                    response_data = response.xpath('//div[@id="dt-{}"]'.format(league_id)).extract_first()
                    # response_data = response.xpath('//div[@id="dt-{}"]'.format('cmp-36254')).extract_first()
                    data = etree.HTML(response_data)
                    # 球队名
                    team_names = data.xpath('//div[@class="rt-event"]/@title')
                    # 全场
                    f_scores = data.xpath('.//div[contains(@class, "rt-ft ")]')
                    # 上半场
                    h_scores = data.xpath('.//div[contains(@class, "rt-ht ")]')
                    # 时间
                    stimes = data.xpath('//div[@class="rt-event"]/../div[1]/span/text()')
                    # 子集玩法
                    # odd_names = data.xpath('//div[@class="rt-sub rt-data-hide"]/table/tbody[2]/tr/td[2]')
                    # 子集玩法结果
                    # odd_plays = data.xpath('//div[@class="rt-sub rt-data-hide"]/table/tbody[2]/tr/td[3]/span')
                    match_ids = data.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
                    odd_datas = data.xpath('//div[contains(@class, "rt-sub ")]/table/tbody[2]')
                    for y in range(len(odd_datas)):
                        match_id = match_ids[y].replace('e-', '')
                        league_id = league_id.replace('cmp-', '')
                        team_name = team_names[y].replace(' ', '').split('-')
                        # 子集玩法
                        odd_names = odd_datas[y].xpath('.//tr/td[2]')
                        # 子集玩法结果
                        odd_plays = odd_datas[y].xpath('.//tr/td[3]/span')
                        # 主队
                        h_name = team_name[0]
                        # 客队
                        a_name = team_name[1]
                        print(h_name, a_name)
                        # 上半场
                        h_score = h_scores[y].xpath('string(.)').replace(' ', '').replace('\r\n', '')
                        # 全场
                        f_score = f_scores[y].xpath('string(.)').replace(' ', '').replace('\r\n', '')
                        # 正则时间匹配规则
                        pattern = re.compile(r"\d{1,3}:\d{1,3}")
                        stime = pattern.findall(stimes[y])[0]
                        if odd_names:
                            for i in range(len(odd_names)):
                                name = odd_names[i].text
                                plays = odd_plays[i].xpath('text()')
                                if len(plays) == 2:
                                    play = '{}&&{}'.format(plays[0], plays[1])
                                else:
                                    play = plays[0]
                        else:
                            pass

            # 篮球赛果
            if pt == 2:
                league_ids = response.xpath('//div[@class="rt-l-bar sportHasQuater"]/@id').extract()
                league_names = response.xpath('//div[@class="rt-l-bar sportHasQuater"]/span[@class="comp-txt"]/text()').extract()
                for index in range(len(league_ids)):
                    league_id = league_ids[index]
                    league_name = league_names[index]
                    response_data = response.xpath('//div[@id="dt-{}"]'.format(league_id)).extract_first()
                    # response_data = response.xpath('//div[@id="dt-{}"]'.format('cmp-26405')).extract_first()
                    data = etree.HTML(response_data)
                    # 球队名
                    team_names = data.xpath('//div[@class="rt-event"]/@title')
                    # 全场
                    f_scores = data.xpath('.//div[@class="rt-qft"]')
                    # 上半场
                    h_scores = data.xpath('.//div[@class="rt-qt1"]')
                    # 下半场
                    x_scores = data.xpath('.//div[@class="rt-qt2"]')
                    # 时间
                    stimes = data.xpath('//div[@class="rt-event"]/../div[1]/span/text()')
                    match_ids = data.xpath('//div[@class="flex-wrap"]/../div[1]/@id')
                    odd_datas = data.xpath('//div[contains(@class, "rt-sub ")]/table/tbody[2]')
                    for y in range(len(odd_datas)):
                        match_id = match_ids[y].replace('e-', '')
                        league_id = league_id.replace('cmp-', '')
                        team_name = team_names[y].replace(' ', '').split('-')
                        # 子集玩法
                        child_data = odd_datas[y].xpath('./tr')
                        # 主队
                        h_name = team_name[0]
                        # 客队
                        a_name = team_name[1]
                        print(h_name, a_name)
                        # 上半场
                        h_score = h_scores[y].xpath('string(.)').replace(' ', '').replace('\r\n', '')
                        # 全场
                        f_score = f_scores[y].xpath('string(.)').replace(' ', '').replace('\r\n', '')
                        # 下半场
                        x_score = x_scores[y].xpath('string(.)').replace(' ', '').replace('\r\n', '')
                        # 正则时间匹配规则
                        pattern = re.compile(r"\d{1,3}:\d{1,3}")
                        stime = pattern.findall(stimes[y])[0]
                        if child_data:
                            for i in range(len(child_data)):
                                if i == 0:
                                    h_datas = child_data[i].xpath('.//td/table/tbody/tr[3]/td[@class="r-odds"]')
                                    a_datas = child_data[i].xpath('.//td/table/tbody/tr[4]/td[@class="r-odds"]')
                                    if h_datas and a_datas:
                                        for x in range(len(h_datas)):
                                            # 主队节得分
                                            h_data = h_datas[x].text.replace(' ', '').replace('\r\n', '')
                                            # 客队节得分
                                            a_data = a_datas[x].text.replace(' ', '').replace('\r\n', '')
                                else:
                                    # 子玩法名
                                    child_name = child_data[i].xpath('.//td[contains(@class, "r-bt ")]/text()')[0].replace(' ', '').replace('\r\n', '')
                                    # 子玩法结果
                                    child_play = child_data[i].xpath('.//td[@class="r-odds"]/span[@class="prop"]/text()')[0]
                        else:
                            pass

            # 网球赛果
            if pt == 3:
                pass

            # 棒球赛果
            if pt == 4:
                pass