| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586 |
- import time
- import os
- import sys
- # print(sys.argv[1])
- can = sys.argv[0]
- # tag = sys.argv[2]
- # print(type(can))
- # 文件timerStartDaily.py
- from scrapy import cmdline
- import datetime
- import time
- import shutil
- import os
- recoderDir = r"crawls" # 这是为了爬虫能够续爬而创建的目录,存储续爬需要的数据
- checkFile = "isRunning.txt" # 爬虫是否在运行的标志
- startTime = datetime.datetime.now()
- print(f"startTime = {startTime}")
- i = 0
- miniter = 0
- while 1:
- isRunning = os.path.isfile(checkFile)
- if not isRunning: # 爬虫不在执行,开始启动爬虫
- # 在爬虫启动之前处理一些事情,清掉JOBDIR = crawls
- isExsit = os.path.isdir(recoderDir) # 检查JOBDIR目录crawls是否存在
- print(f"mySpider not running, ready to start. isExsit:{isExsit}")
- if isExsit:
- removeRes = shutil.rmtree(recoderDir) # 删除续爬目录crawls及目录下所有文件
- print(f"At time:{datetime.datetime.now()}, delete res:{removeRes}")
- else:
- print(f"At time:{datetime.datetime.now()}, Dir:{recoderDir} is not exsit.")
- time.sleep(20)
- clawerTime = datetime.datetime.now()
- waitTime = clawerTime - startTime
- print(f"At time:{clawerTime}, start clawer: mySpider !!!, waitTime:{waitTime}")
- cmdline.execute('scrapy crawl mySpider -s JOBDIR=crawls/storeMyRequest'.split())
- break # 爬虫结束之后,退出脚本
- else:
- print(f"At time:{datetime.datetime.now()}, mySpider is running, sleep to wait.")
- i += 1
- time.sleep(600) # 每10分钟检查一次
- miniter += 10
- if miniter >= 1440: # 等待满24小时,自动退出监控脚本
- break
- if can == 'roll':
- while True:
- os.system("scrapy crawl roll_zuqiu")
- time.sleep(2)
- # os.system("scrapy crawl zq_jieshu")
- # time.sleep(2)
- os.system("scrapy crawl roll_lanqiu")
- time.sleep(2)
- # os.system("scrapy crawl lq_jieshu")
- # time.sleep(2)
- os.system("scrapy crawl roll_wangqiu")
- time.sleep(2)
- # os.system("scrapy crawl wq_jieshu")
- # time.sleep(2)
- os.system("scrapy crawl roll_bangqiu")
- # time.sleep(2)
- # os.system("scrapy crawl bq_jieshu")
- time.sleep(120)
- if can == 'notroll':
- while True:
- os.system("scrapy crawl zuqiu")
- time.sleep(4)
- os.system("scrapy crawl wqbodan")
- time.sleep(4)
- os.system("scrapy crawl wangqiu")
- time.sleep(4)
- os.system("scrapy crawl lanqiu")
- time.sleep(4)
- os.system("scrapy crawl liansai")
- time.sleep(4)
- os.system("scrapy crawl guanjun")
- time.sleep(4)
- os.system("scrapy crawl bangqiu")
- time.sleep(300)
|