|
|
@@ -1,120 +0,0 @@
|
|
|
-# -*- coding: utf-8 -*-
|
|
|
-
|
|
|
-# Scrapy settings for scrapy_yzd project
|
|
|
-#
|
|
|
-# For simplicity, this file contains only settings considered important or
|
|
|
-# commonly used. You can find more settings consulting the documentation:
|
|
|
-#
|
|
|
-# http://doc.scrapy.org/en/latest/topics/settings.html
|
|
|
-# http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
|
|
|
-# http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
|
|
|
-
|
|
|
-BOT_NAME = 'scrapy_yzd'
|
|
|
-
|
|
|
-SPIDER_MODULES = ['scrapy_yzd.spiders']
|
|
|
-NEWSPIDER_MODULE = 'scrapy_yzd.spiders'
|
|
|
-
|
|
|
-LOG_LEVEL = 'INFO'
|
|
|
-# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
|
|
-USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
|
|
|
-
|
|
|
-# Obey robots.txt rules
|
|
|
-ROBOTSTXT_OBEY = False
|
|
|
-
|
|
|
-# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
|
|
-#CONCURRENT_REQUESTS = 32
|
|
|
-
|
|
|
-# Configure a delay for requests for the same website (default: 0)
|
|
|
-# See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
|
|
|
-# See also autothrottle settings and docs
|
|
|
-DOWNLOAD_DELAY = 2
|
|
|
-# The download delay setting will honor only one of:
|
|
|
-#CONCURRENT_REQUESTS_PER_DOMAIN = 16
|
|
|
-#CONCURRENT_REQUESTS_PER_IP = 16
|
|
|
-
|
|
|
-# Disable cookies (enabled by default)
|
|
|
-COOKIES_ENABLED = False
|
|
|
-
|
|
|
-# Disable Telnet Console (enabled by default)
|
|
|
-#TELNETCONSOLE_ENABLED = False
|
|
|
-
|
|
|
-# Override the default request headers:
|
|
|
-#DEFAULT_REQUEST_HEADERS = {
|
|
|
-# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
|
-# 'Accept-Language': 'en',
|
|
|
-#}
|
|
|
-
|
|
|
-# Enable or disable spider middlewares
|
|
|
-# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
|
|
|
-# from scrapy_deltafetch import DeltaFetch
|
|
|
-# SPIDER_MIDDLEWARES = {
|
|
|
-# 'scrapy_deltafetch.DeltaFetch': 1
|
|
|
-# }
|
|
|
-# DELTAFETCH_ENABLED = True
|
|
|
-
|
|
|
-# Enable or disable downloader middlewares
|
|
|
-# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
|
|
|
-DOWNLOADER_MIDDLEWARES = {
|
|
|
- # 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None, # 把本来的代理中间件废掉
|
|
|
- # 'scrapy_yzd.middlewares.RotateUserAgentMiddleware': 400, # 切换agent
|
|
|
- # 'scrapy_yzd.autoproxy.AutoProxyMiddleware': 543 # 代理池
|
|
|
-}
|
|
|
-
|
|
|
-AUTO_PROXY = { # 代理池中间件设置,详见Github
|
|
|
- #'test_urls':[('http://www.jd.com', '4006561155')],
|
|
|
- 'ban_code': [500, 502, 503, 400, 504],
|
|
|
- 'init_valid_proxys': 2,
|
|
|
-}
|
|
|
-
|
|
|
-# Enable or disable extensions
|
|
|
-# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
|
|
|
-#EXTENSIONS = {
|
|
|
-# 'scrapy.extensions.telnet.TelnetConsole': None,
|
|
|
-#}
|
|
|
-
|
|
|
-# Configure item pipelines
|
|
|
-# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
|
|
|
-ITEM_PIPELINES = {
|
|
|
- # Lanqiupipeline
|
|
|
- # 'scrapy_yzd.pipelines.Lanqiupipeline':1,
|
|
|
- # 'scrapy_yzd.pipelines.Wangqiupipeline':1,
|
|
|
- # 'scrapy_yzd.pipelines.BallStatuspipeline':1,
|
|
|
- # 'scrapy_yzd.pipelines.SomePipeline': 300,
|
|
|
- # 'scrapy_yzd.pipelines.ScrapyYzdPipeline':100,
|
|
|
- # 'scrapy_yzd.pipelines.Temaballpipeline':1
|
|
|
- # 'scrapy_yzd.pipelines.MysqlTwistedPipline':1
|
|
|
- # 'scrapy_yzd.pipelines.Banqiupipeline':1
|
|
|
-}
|
|
|
-
|
|
|
-# Enable and configure the AutoThrottle extension (disabled by default)
|
|
|
-# See http://doc.scrapy.org/en/latest/topics/autothrottle.html
|
|
|
-#AUTOTHROTTLE_ENABLED = True
|
|
|
-# The initial download delay
|
|
|
-#AUTOTHROTTLE_START_DELAY = 5
|
|
|
-# The maximum download delay to be set in case of high latencies
|
|
|
-#AUTOTHROTTLE_MAX_DELAY = 60
|
|
|
-# The average number of requests Scrapy should be sending in parallel to
|
|
|
-# each remote server
|
|
|
-#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
|
|
|
-# Enable showing throttling stats for every response received:
|
|
|
-#AUTOTHROTTLE_DEBUG = False
|
|
|
-
|
|
|
-# Enable and configure HTTP caching (disabled by default)
|
|
|
-# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
|
|
|
-#HTTPCACHE_ENABLED = True
|
|
|
-#HTTPCACHE_EXPIRATION_SECS = 0
|
|
|
-#HTTPCACHE_DIR = 'httpcache'
|
|
|
-#HTTPCACHE_IGNORE_HTTP_CODES = []
|
|
|
-#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
|
|
|
-
|
|
|
-
|
|
|
-# database = PostgresqlDatabase('kaiyou',**{'host': '192.168.2.200', 'port': 10432, 'user': 'kaiyou', 'password': '123456'})
|
|
|
-
|
|
|
-# database = PostgresqlDatabase('postgres', **{'host': '127.0.0.1', 'port': 5432, 'user': 'postgres', 'password': '9998877'})
|
|
|
-
|
|
|
-
|
|
|
-POST_HOST = '192.168.2.200'
|
|
|
-POST_DATABASE = 'kaiyou'
|
|
|
-POST_USER = 'kaiyou'
|
|
|
-POST_PORT = '10432'
|
|
|
-POST_PASSWORD = '123456'
|