import time import os import sys # print(sys.argv[1]) can = sys.argv[0] # tag = sys.argv[2] # print(type(can)) # 文件timerStartDaily.py from scrapy import cmdline import datetime import time import shutil import os recoderDir = r"crawls" # 这是为了爬虫能够续爬而创建的目录,存储续爬需要的数据 checkFile = "isRunning.txt" # 爬虫是否在运行的标志 startTime = datetime.datetime.now() print(f"startTime = {startTime}") i = 0 miniter = 0 while 1: isRunning = os.path.isfile(checkFile) if not isRunning: # 爬虫不在执行,开始启动爬虫 # 在爬虫启动之前处理一些事情,清掉JOBDIR = crawls isExsit = os.path.isdir(recoderDir) # 检查JOBDIR目录crawls是否存在 print(f"mySpider not running, ready to start. isExsit:{isExsit}") if isExsit: removeRes = shutil.rmtree(recoderDir) # 删除续爬目录crawls及目录下所有文件 print(f"At time:{datetime.datetime.now()}, delete res:{removeRes}") else: print(f"At time:{datetime.datetime.now()}, Dir:{recoderDir} is not exsit.") time.sleep(20) clawerTime = datetime.datetime.now() waitTime = clawerTime - startTime print(f"At time:{clawerTime}, start clawer: mySpider !!!, waitTime:{waitTime}") cmdline.execute('scrapy crawl mySpider -s JOBDIR=crawls/storeMyRequest'.split()) break # 爬虫结束之后,退出脚本 else: print(f"At time:{datetime.datetime.now()}, mySpider is running, sleep to wait.") i += 1 time.sleep(600) # 每10分钟检查一次 miniter += 10 if miniter >= 1440: # 等待满24小时,自动退出监控脚本 break if can == 'roll': while True: os.system("scrapy crawl roll_zuqiu") time.sleep(2) # os.system("scrapy crawl zq_jieshu") # time.sleep(2) os.system("scrapy crawl roll_lanqiu") time.sleep(2) # os.system("scrapy crawl lq_jieshu") # time.sleep(2) os.system("scrapy crawl roll_wangqiu") time.sleep(2) # os.system("scrapy crawl wq_jieshu") # time.sleep(2) os.system("scrapy crawl roll_bangqiu") # time.sleep(2) # os.system("scrapy crawl bq_jieshu") time.sleep(120) if can == 'notroll': while True: os.system("scrapy crawl zuqiu") time.sleep(4) os.system("scrapy crawl wqbodan") time.sleep(4) os.system("scrapy crawl wangqiu") time.sleep(4) os.system("scrapy crawl lanqiu") time.sleep(4) os.system("scrapy crawl liansai") time.sleep(4) os.system("scrapy crawl guanjun") time.sleep(4) os.system("scrapy crawl bangqiu") time.sleep(300)