| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207 |
- # -*- coding: utf-8 -*-
- # import jmespath
- import jsonpath
- import scrapy
- import json
- from scrapy.http import Request
- from scrapy_yzd.items import Hg3535Item, Today_all,Temaball
- class Hg3535Spider(scrapy.Spider):
- name = 'hg3535_supplement'
- allowed_domains = ['hg3535z.com']
- start_urls = ['http://hg3535z.com/odds2/d/getodds?sid=1&pt=4&ubt=am&pn=0&sb=2&dc=null&pid=0'] # 滚球菜单 足球滚球列url
- custom_settings = {
- "ITEM_PIPELINES": {
- "scrapy_yzd.pipelines.Temaballpipeline": 300,
- }
- }
- # start_urls = ['http://hg3535z.com/odds2/d/getodds?sid=3&pt=4&ubt=am&pn=0&sb=2&dc=null&pid=0'] # 滚球菜单 网球url
- # start_urls = ['http://hg3535z.com/odds2/d/getodds?sid=2&pt=4&ubt=am&pn=0&sb=2&dc=null&pid=0']
- # start_urls = ['http://hg3535z.com/odds2/d/getodds?sid=2&pt=4&ubt=am&pn=0&sb=2&dc=null&pid=0'] #篮球
- # start_urls = ['http://hg3535z.com/odds2/d/getodds?sid=1&pt=1&ubt=am&pn=0&sb=2&dc=null&pid=0'] # 今日足球
- def parse(self, response):
- datas = json.loads(response.text)
- # item = Today_all()
- ids = jsonpath.jsonpath(datas, '$..i-ot[0]..egs..es..i[16]') # ids新列表
- print(ids)
- if ids:
- ids = set(ids)
- for i in ids:
- urls = 'http://hg3535z.com/odds2/d/getamodds?eid={}&iip=true&ubt=am&isp=false'.format(i)
- print(urls)
- yield Request(url=urls, callback=self.parse_other)
- # ids = []
- #页面没数据异常处理
- # try:
- # results = datas['i-ot'][0]['egs']
- # except KeyError:
- # results = []
- # 第一
- # for result in results:
- # league_id = result['c']['k'] #联赛id
- # league_name = result['c']['n'] #联赛名
- # new_results = result['es'] # 取出列表嵌套字典
- # 二
- # for new_result in new_results:
- # ids = []
- # game_id = str(new_result['i'][16]) #比赛id
- # ids.append(game_id)
- # if ids:
- # ids1 = set(ids)#去重
- def parse_other(self, response):
- print(response)
- # 球队进球数 大小
- try:
- datas = json.loads(response.text)['eg']['es']
- league_id = json.loads(response.text)['eg']['c']['k']
- # 联赛名
- league_name = json.loads(response.text)['eg']['c']['n']
- except:
- datas = ""
- league_id = ""
- # 联赛名
- league_name = ""
- if datas:
- item = Temaball()
- full_dict = {}
- half_dict = {}
- full_dict_rule = {}
- half_dict_rule = {}
- data_list = []
- for data in datas:
- try:
- new_data = data['pci']['ctid']
- except:
- new_data = ""
- pass
- if new_data == 0:
- # 比赛id
- game_id = str(data['k'])
- data_list.append(game_id)
- # 球队1
- team_home = data['i'][0]
- data_list.append(team_home)
- # 球队2
- team_guest = data['i'][1]
- data_list.append(team_guest)
- # 数量(97>)
- number = data['i'][2]
- data_list.append(number)
- # 状态
- # zhuangtai = data['i'][3]
- # 日期
- data_game = data['i'][4]
- data_list.append(data_game)
- # 开赛时间
- time_game = data['i'][5]
- data_list.append(time_game)
- if new_data == 12:
- try:
- home_data = data['o']['ou']['v']
- # 球队进球 大条件
- home_size_big_rule = home_data[1]
- full_dict_rule['home_size_big'] = home_size_big_rule
- # 球队进球大赔率
- home_size_big = home_data[5]
- full_dict['home_size_big'] = home_size_big
- # 球队进球小条件
- home_size_small_rule = home_data[3]
- full_dict_rule['home_size_small'] = home_size_small_rule
- # 球队进球小赔率
- home_size_small = home_data[7]
- full_dict['home_size_small'] = home_size_small
- except:
- full_dict['home_size_small'] = ""
- full_dict_rule['home_size_small'] = ""
- full_dict['home_size_big'] = ""
- full_dict_rule['home_size_big'] = ""
- try:
- half_home_data = data['o']['ou1st']['v']
- half_home_size_big_rule = half_home_data[1]
- half_dict_rule['half_home_size_big'] = half_home_size_big_rule
- half_home_size_big = half_home_data[5]
- half_dict['half_home_size_big'] = half_home_size_big
- half_home_size_small_rule = half_home_data[3]
- half_dict_rule['half_home_size_small'] = half_home_size_small_rule
- half_home_size_small = half_home_data[7]
- half_dict['half_home_size_small'] = half_home_size_small
- except:
- half_dict_rule['half_home_size_big'] = ""
- half_dict['half_home_size_big'] = ""
- half_dict_rule['half_home_size_small'] = ""
- half_dict['half_home_size_small'] = ""
- if new_data == 13:
- try:
- guest_data = data['o']['ou']['v']
- guest_size_big_rule = guest_data[1]
- full_dict_rule['guest_size_big'] = guest_size_big_rule
- guest_size_big = guest_data[5]
- full_dict['guest_size_big'] = guest_size_big
- guest_size_small_rule = guest_data[3]
- full_dict_rule['guest_size_small'] = guest_size_small_rule
- guest_size_small = guest_data[7]
- full_dict['guest_size_small'] = guest_size_small
- except:
- full_dict_rule['guest_size_big'] = ""
- full_dict['guest_size_big'] = ""
- full_dict_rule['guest_size_small'] = ""
- full_dict['guest_size_small'] = ''
- try:
- half_guest_data = data['o']['ou1st']['v']
- half_guest_size_big_rule = half_guest_data[1]
- half_dict_rule['half_guest_size_big'] = half_guest_size_big_rule
- half_guest_size_big = half_guest_data[5]
- half_dict['half_guest_size_big'] = half_guest_size_big
- half_guest_size_small_rule = half_guest_data[3]
- half_dict_rule['half_guest_size_small'] = half_guest_size_small_rule
- half_guest_size_small = half_guest_data[7]
- half_dict['half_guest_size_small'] = half_guest_size_small
- except:
- half_dict_rule['half_guest_size_big'] = ""
- half_dict['half_guest_size_big'] = ""
- half_dict_rule['half_guest_size_small'] = ""
- half_dict['half_guest_size_small'] = ""
- #
- item['league_id'] = league_id
- item['league_name'] = league_name
- item['game_id'] = data_list[0]
- item['team_home'] = data_list[1]
- item['team_guest'] = data_list[2]
- item['number'] = data_list[3]
- item['data_game'] = data_list[4]
- item['time_game'] = data_list[5]
- item['full_data'] = full_dict
- item['half_data'] = half_dict
- item['full_data_rule'] = full_dict_rule
- item['half_data_rule'] = half_dict_rule
- yield item
|