timerStartDaily.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. import time
  2. import os
  3. import sys
  4. # print(sys.argv[1])
  5. can = sys.argv[0]
  6. # tag = sys.argv[2]
  7. # print(type(can))
  8. # 文件timerStartDaily.py
  9. from scrapy import cmdline
  10. import datetime
  11. import time
  12. import shutil
  13. import os
  14. recoderDir = r"crawls" # 这是为了爬虫能够续爬而创建的目录,存储续爬需要的数据
  15. checkFile = "isRunning.txt" # 爬虫是否在运行的标志
  16. startTime = datetime.datetime.now()
  17. print(f"startTime = {startTime}")
  18. i = 0
  19. miniter = 0
  20. while 1:
  21. isRunning = os.path.isfile(checkFile)
  22. if not isRunning: # 爬虫不在执行,开始启动爬虫
  23. # 在爬虫启动之前处理一些事情,清掉JOBDIR = crawls
  24. isExsit = os.path.isdir(recoderDir) # 检查JOBDIR目录crawls是否存在
  25. print(f"mySpider not running, ready to start. isExsit:{isExsit}")
  26. if isExsit:
  27. removeRes = shutil.rmtree(recoderDir) # 删除续爬目录crawls及目录下所有文件
  28. print(f"At time:{datetime.datetime.now()}, delete res:{removeRes}")
  29. else:
  30. print(f"At time:{datetime.datetime.now()}, Dir:{recoderDir} is not exsit.")
  31. time.sleep(20)
  32. clawerTime = datetime.datetime.now()
  33. waitTime = clawerTime - startTime
  34. print(f"At time:{clawerTime}, start clawer: mySpider !!!, waitTime:{waitTime}")
  35. cmdline.execute('scrapy crawl mySpider -s JOBDIR=crawls/storeMyRequest'.split())
  36. break # 爬虫结束之后,退出脚本
  37. else:
  38. print(f"At time:{datetime.datetime.now()}, mySpider is running, sleep to wait.")
  39. i += 1
  40. time.sleep(600) # 每10分钟检查一次
  41. miniter += 10
  42. if miniter >= 1440: # 等待满24小时,自动退出监控脚本
  43. break
  44. if can == 'roll':
  45. while True:
  46. os.system("scrapy crawl roll_zuqiu")
  47. time.sleep(2)
  48. # os.system("scrapy crawl zq_jieshu")
  49. # time.sleep(2)
  50. os.system("scrapy crawl roll_lanqiu")
  51. time.sleep(2)
  52. # os.system("scrapy crawl lq_jieshu")
  53. # time.sleep(2)
  54. os.system("scrapy crawl roll_wangqiu")
  55. time.sleep(2)
  56. # os.system("scrapy crawl wq_jieshu")
  57. # time.sleep(2)
  58. os.system("scrapy crawl roll_bangqiu")
  59. # time.sleep(2)
  60. # os.system("scrapy crawl bq_jieshu")
  61. time.sleep(120)
  62. if can == 'notroll':
  63. while True:
  64. os.system("scrapy crawl zuqiu")
  65. time.sleep(4)
  66. os.system("scrapy crawl wqbodan")
  67. time.sleep(4)
  68. os.system("scrapy crawl wangqiu")
  69. time.sleep(4)
  70. os.system("scrapy crawl lanqiu")
  71. time.sleep(4)
  72. os.system("scrapy crawl liansai")
  73. time.sleep(4)
  74. os.system("scrapy crawl guanjun")
  75. time.sleep(4)
  76. os.system("scrapy crawl bangqiu")
  77. time.sleep(300)