6 years ago · c0e3937c06
--- a/scrapy_yzd/scrapy_yzd/settings.py
+++ b/scrapy_yzd/scrapy_yzd/settings.py
@@ -1,120 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-
			
 
				-# Scrapy settings for scrapy_yzd project
			
 
				-#
			
 
				-# For simplicity, this file contains only settings considered important or
			
 
				-# commonly used. You can find more settings consulting the documentation:
			
 
				-#
			
 
				-#     http://doc.scrapy.org/en/latest/topics/settings.html
			
 
				-#     http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
			
 
				-#     http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
			
 
				-
			
 
				-BOT_NAME = 'scrapy_yzd'
			
 
				-
			
 
				-SPIDER_MODULES = ['scrapy_yzd.spiders']
			
 
				-NEWSPIDER_MODULE = 'scrapy_yzd.spiders'
			
 
				-
			
 
				-LOG_LEVEL = 'INFO'
			
 
				-# Crawl responsibly by identifying yourself (and your website) on the user-agent
			
 
				-USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
			
 
				-
			
 
				-# Obey robots.txt rules
			
 
				-ROBOTSTXT_OBEY = False
			
 
				-
			
 
				-# Configure maximum concurrent requests performed by Scrapy (default: 16)
			
 
				-#CONCURRENT_REQUESTS = 32
			
 
				-
			
 
				-# Configure a delay for requests for the same website (default: 0)
			
 
				-# See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
			
 
				-# See also autothrottle settings and docs
			
 
				-DOWNLOAD_DELAY = 2
			
 
				-# The download delay setting will honor only one of:
			
 
				-#CONCURRENT_REQUESTS_PER_DOMAIN = 16
			
 
				-#CONCURRENT_REQUESTS_PER_IP = 16
			
 
				-
			
 
				-# Disable cookies (enabled by default)
			
 
				-COOKIES_ENABLED = False
			
 
				-
			
 
				-# Disable Telnet Console (enabled by default)
			
 
				-#TELNETCONSOLE_ENABLED = False
			
 
				-
			
 
				-# Override the default request headers:
			
 
				-#DEFAULT_REQUEST_HEADERS = {
			
 
				-#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
			
 
				-#   'Accept-Language': 'en',
			
 
				-#}
			
 
				-
			
 
				-# Enable or disable spider middlewares
			
 
				-# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
			
 
				-# from scrapy_deltafetch import DeltaFetch
			
 
				-# SPIDER_MIDDLEWARES = {
			
 
				-# 'scrapy_deltafetch.DeltaFetch': 1
			
 
				-# }
			
 
				-# DELTAFETCH_ENABLED = True
			
 
				-
			
 
				-# Enable or disable downloader middlewares
			
 
				-# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
			
 
				-DOWNLOADER_MIDDLEWARES = {
			
 
				-    # 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None,  # 把本来的代理中间件废掉
			
 
				-    # 'scrapy_yzd.middlewares.RotateUserAgentMiddleware': 400,  # 切换agent
			
 
				-    # 'scrapy_yzd.autoproxy.AutoProxyMiddleware': 543  # 代理池
			
 
				-}
			
 
				-
			
 
				-AUTO_PROXY = {  # 代理池中间件设置，详见Github
			
 
				-    #'test_urls':[('http://www.jd.com', '4006561155')],
			
 
				-    'ban_code': [500, 502, 503, 400, 504],
			
 
				-    'init_valid_proxys': 2,
			
 
				-}
			
 
				-
			
 
				-# Enable or disable extensions
			
 
				-# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
			
 
				-#EXTENSIONS = {
			
 
				-#    'scrapy.extensions.telnet.TelnetConsole': None,
			
 
				-#}
			
 
				-
			
 
				-# Configure item pipelines
			
 
				-# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
			
 
				-ITEM_PIPELINES = {
			
 
				-    # Lanqiupipeline
			
 
				-    # 'scrapy_yzd.pipelines.Lanqiupipeline':1,
			
 
				-    # 'scrapy_yzd.pipelines.Wangqiupipeline':1,
			
 
				-    # 'scrapy_yzd.pipelines.BallStatuspipeline':1,
			
 
				-   # 'scrapy_yzd.pipelines.SomePipeline': 300,
			
 
				-   #  'scrapy_yzd.pipelines.ScrapyYzdPipeline':100,
			
 
				-   #  'scrapy_yzd.pipelines.Temaballpipeline':1
			
 
				-   #  'scrapy_yzd.pipelines.MysqlTwistedPipline':1
			
 
				-   #  'scrapy_yzd.pipelines.Banqiupipeline':1
			
 
				-}
			
 
				-
			
 
				-# Enable and configure the AutoThrottle extension (disabled by default)
			
 
				-# See http://doc.scrapy.org/en/latest/topics/autothrottle.html
			
 
				-#AUTOTHROTTLE_ENABLED = True
			
 
				-# The initial download delay
			
 
				-#AUTOTHROTTLE_START_DELAY = 5
			
 
				-# The maximum download delay to be set in case of high latencies
			
 
				-#AUTOTHROTTLE_MAX_DELAY = 60
			
 
				-# The average number of requests Scrapy should be sending in parallel to
			
 
				-# each remote server
			
 
				-#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
			
 
				-# Enable showing throttling stats for every response received:
			
 
				-#AUTOTHROTTLE_DEBUG = False
			
 
				-
			
 
				-# Enable and configure HTTP caching (disabled by default)
			
 
				-# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
			
 
				-#HTTPCACHE_ENABLED = True
			
 
				-#HTTPCACHE_EXPIRATION_SECS = 0
			
 
				-#HTTPCACHE_DIR = 'httpcache'
			
 
				-#HTTPCACHE_IGNORE_HTTP_CODES = []
			
 
				-#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
			
 
				-
			
 
				-
			
 
				-# database = PostgresqlDatabase('kaiyou',**{'host': '192.168.2.200', 'port': 10432, 'user': 'kaiyou', 'password': '123456'})
			
 
				-
			
 
				-# database = PostgresqlDatabase('postgres', **{'host': '127.0.0.1', 'port': 5432, 'user': 'postgres', 'password': '9998877'})
			
 
				-
			
 
				-
			
 
				-POST_HOST = '192.168.2.200'
			
 
				-POST_DATABASE = 'kaiyou'
			
 
				-POST_USER = 'kaiyou'
			
 
				-POST_PORT = '10432'
			
 
				-POST_PASSWORD = '123456'