guanjun.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. # -*- coding: utf-8 -*-
  2. import copy
  3. import datetime
  4. import json
  5. import scrapy
  6. from scrapy.http import Request
  7. from .. items import Guanjun
  8. class LqguanjunSpider(scrapy.Spider):
  9. name = 'guanjun'
  10. allowed_domains = ['hg3535z.com']
  11. to_day = datetime.datetime.now()
  12. custom_settings = {
  13. "ITEM_PIPELINES": {
  14. 'hg3535.pipelines.Guanjunpipeline': 300,
  15. },
  16. 'LOG_LEVEL': 'DEBUG',
  17. 'LOG_FILE': "../hg3535/log/guanjun_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
  18. }
  19. def start_requests(self):
  20. for z in range(1, 5):
  21. # for y in range(1, 4):
  22. # for x in range(0, 4):
  23. # url = 'https://hg3535z.com/odds2/d/getodds?sid=' + str(z) + '&pt=' + str(y) + '&ubt=or&pn=' + str(
  24. # x) + '&sb=2&dc=null&pid=0'
  25. # # yield Request(url=url, callback=self.parse, meta={'pt': y, 'sid': z}, dont_filter=True)
  26. # # https://hg3535z.com/odds2/d/getodds?sid=1&pt=1&ubt=am&pn=0&sb=2&dc=null&pid=0
  27. # # https://hg3535z.com/odds2/d/getodds?sid=1&pt=1&ubt=or&pn=0&sb=2&dc=null&pid=0
  28. # yield Request(url=url, callback=self.parse, meta={'pt': y, 'sid': z}, dont_filter=True)
  29. # for y in range(1, 4):
  30. for x in range(0, 4):
  31. url = 'https://hg3535z.com/odds2/d/getodds?sid=' + str(z) + '&pt=1&ubt=or&pn=' + str(
  32. x) + '&sb=2&dc=null&pid=0'
  33. # yield Request(url=url, callback=self.parse, meta={'pt': y, 'sid': z}, dont_filter=True)
  34. # https://hg3535z.com/odds2/d/getodds?sid=1&pt=1&ubt=am&pn=0&sb=2&dc=null&pid=0
  35. # https://hg3535z.com/odds2/d/getodds?sid=1&pt=1&ubt=or&pn=0&sb=2&dc=null&pid=0
  36. yield Request(url=url, callback=self.parse, meta={'pt': 1, 'sid': z}, dont_filter=True)
  37. def parse(self, response):
  38. try:
  39. datas = json.loads(response.text)
  40. except:
  41. datas = ""
  42. try:
  43. results = datas['n-ot']['egs']
  44. except:
  45. results = ""
  46. try:
  47. pt = copy.copy(response.meta['pt'])
  48. except:
  49. pt = 0
  50. try:
  51. ball = datas['n-ot']['s']['n']
  52. except:
  53. ball = ""
  54. if results:
  55. for result in results:
  56. # 联赛id
  57. league_id = result['c']['k']
  58. # 联赛名
  59. league_name = result['c']['n']
  60. new_results = result['es']
  61. for new_result in new_results:
  62. data_game = new_result['i'][4]
  63. time_game = new_result['i'][5]
  64. new_league_name = new_result['egn'].replace('\t', '')
  65. for i in new_result['n-o']:
  66. new_champion = i['mn']
  67. for y in i['o']:
  68. item = Guanjun()
  69. game_id = str(new_result['i'][16])
  70. tema_home = y[0]
  71. champion_team = float(y[2]) - 1
  72. item['league_name'] = league_name
  73. item['tema_home'] = tema_home
  74. item['league_id'] = league_id
  75. item['game_id'] = game_id
  76. item['data_game'] = data_game
  77. item['time_game'] = time_game
  78. item['new_league_name'] = new_league_name
  79. item['champion_team'] = champion_team
  80. item['new_champion'] = new_champion
  81. item['pt'] = pt
  82. item['ball'] = ball
  83. yield item