liansai.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. # -*- coding: utf-8 -*-
  2. import datetime
  3. import json
  4. import scrapy
  5. # from scrapy.http import Request
  6. from .. items import Liansai
  7. class LanqiulsSpider(scrapy.Spider):
  8. name = 'liansai'
  9. to_day = datetime.datetime.now()
  10. allowed_domains = ['hg3535z.com']
  11. custom_settings = {
  12. "ITEM_PIPELINES": {
  13. 'hg3535.pipelines.Liansaipipeline': 300,
  14. },
  15. # 'LOG_LEVEL': 'DEBUG',
  16. # 'LOG_FILE': "../hg3535/log/liansai_{}_{}_{}.log".format(to_day.year, to_day.month, to_day.day)
  17. }
  18. def start_requests(self):
  19. gj_list = ['am', 'or']
  20. for y in range(1, 5):
  21. for i in range(1, 5):
  22. for z in gj_list:
  23. url = 'https://hg3535z.com/odds2/d/getcomps?sid='+str(y)+'&pt=' + str(i) + '&ubt=' + z +'&dc=null&pn=0&pid=0'
  24. yield scrapy.Request(url=url, callback=self.parse, dont_filter=True)
  25. # yield scrapy.Request(url=url, callback=self.parse)
  26. def parse(self, response):
  27. try:
  28. datas = json.loads(response.text)['gs']
  29. except:
  30. datas = ""
  31. if datas:
  32. for data in datas:
  33. # 区域id,欧洲
  34. area_id = data['gid']
  35. # 区域名,欧洲
  36. area_name = data['gn']
  37. new_data = data['fc']
  38. if new_data:
  39. for i in new_data:
  40. item = Liansai()
  41. ball = json.loads(response.text)['s']
  42. st_league = i["id"]
  43. name_chinese = i['nm']
  44. item['area_id'] = area_id
  45. item['area_name'] = area_name
  46. item['st_league'] = st_league
  47. item['name_chinese'] = name_chinese
  48. item['ball'] = ball
  49. yield item