settings.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. # -*- coding: utf-8 -*-
  2. # Scrapy settings for hg3535 project
  3. #
  4. # For simplicity, this file contains only settings considered important or
  5. # commonly used. You can find more settings consulting the documentation:
  6. #
  7. # https://doc.scrapy.org/en/latest/topics/settings.html
  8. # https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
  9. # https://doc.scrapy.org/en/latest/topics/spider-middleware.html
  10. BOT_NAME = 'hg3535'
  11. SPIDER_MODULES = ['hg3535.spiders']
  12. NEWSPIDER_MODULE = 'hg3535.spiders'
  13. # Crawl responsibly by identifying yourself (and your website) on the user-agent
  14. USER_AGENT = {
  15. 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:16.0) Gecko/20120813 Firefox/16.0'
  16. }
  17. # Obey robots.txt rules
  18. ROBOTSTXT_OBEY = False
  19. # Configure maximum concurrent requests performed by Scrapy (default: 16)
  20. CONCURRENT_REQUESTS = 100
  21. # Configure a delay for requests for the same website (default: 0)
  22. # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
  23. # See also autothrottle settings and docs
  24. #DOWNLOAD_DELAY = 3
  25. DOWNLOAD_DELAY = 0
  26. # The download delay setting will honor only one of:
  27. CONCURRENT_REQUESTS_PER_DOMAIN = 100
  28. CONCURRENT_REQUESTS_PER_IP = 100
  29. # Disable cookies (enabled by default)
  30. #COOKIES_ENABLED = False
  31. # Disable Telnet Console (enabled by default)
  32. #TELNETCONSOLE_ENABLED = False
  33. # Override the default request headers:
  34. #DEFAULT_REQUEST_HEADERS = {
  35. # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  36. # 'Accept-Language': 'en',
  37. #}
  38. # Enable or disable spider middlewares
  39. # See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
  40. # from scrapy_deltafetch
  41. # SPIDER_MIDDLEWARES = {
  42. # # 'scrapy_deltafetch.DeltaFetch': 100,
  43. # 'hg3535.middlewares.Hg3535SpiderMiddleware': 543
  44. # }
  45. # #
  46. # DELTAFETCH_ENABLED = True
  47. # Enable or disable downloader middlewares
  48. # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
  49. DOWNLOADER_MIDDLEWARES = {
  50. # 'hg3535.middlewares.Hg3535DownloaderMiddleware': 200,
  51. 'hg3535.middlewares.Hg3535timeoutDownloaderMiddleware': 200,
  52. 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware': 500,
  53. # 'scrapy.contrib.downloadermiddleware.retry.RetryMiddleware': 502,
  54. }
  55. # Enable or disable extensions
  56. # See https://doc.scrapy.org/en/latest/topics/extensions.html
  57. #EXTENSIONS = {
  58. # 'scrapy.extensions.telnet.TelnetConsole': None,
  59. #}
  60. # Configure item pipelines
  61. # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
  62. ITEM_PIPELINES = {
  63. # 篮球
  64. 'hg3535.pipelines.Lanqiupipeline': 300,
  65. # 冠军
  66. 'hg3535.pipelines.Guanjunpipeline': 300,
  67. # 联赛
  68. 'hg3535.pipelines.Liansaipipeline': 300,
  69. # 足球
  70. 'hg3535.pipelines.Zuqiupipeline': 300,
  71. # 网球
  72. "hg3535.pipelines.Wangqiupipeline": 300,
  73. # 网球波胆
  74. 'hg3535.pipelines.Wqbodanpipeline': 300,
  75. # 棒球让球&大小盘
  76. "hg3535.pipelines.Bangqiupipeline": 300,
  77. }
  78. # Enable and configure the AutoThrottle extension (disabled by default)
  79. # See https://doc.scrapy.org/en/latest/topics/autothrottle.html
  80. #AUTOTHROTTLE_ENABLED = True
  81. # The initial download delay
  82. #AUTOTHROTTLE_START_DELAY = 5
  83. # The maximum download delay to be set in case of high latencies
  84. #AUTOTHROTTLE_MAX_DELAY = 60
  85. # The average number of requests Scrapy should be sending in parallel to
  86. # each remote server
  87. #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
  88. # Enable showing throttling stats for every response received:
  89. #AUTOTHROTTLE_DEBUG = False
  90. # Enable and configure HTTP caching (disabled by default)
  91. # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
  92. #HTTPCACHE_ENABLED = True
  93. #HTTPCACHE_EXPIRATION_SECS = 0
  94. #HTTPCACHE_DIR = 'httpcache'
  95. #HTTPCACHE_IGNORE_HTTP_CODES = []
  96. #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
  97. # DOWNLOAD_TIMEOUT = 180
  98. REACTOR_THREADPOOL_MAXSIZE = 40
  99. # LOG_LEVEL = 'INFO'
  100. COOKIES_ENABLED = False
  101. RETRY_ENABLED = False
  102. DOWNLOAD_TIMEOUT = 2
  103. REDIRECT_ENABLED = False
  104. # RETRY_ENABLED = True
  105. # RETRY_TIMES = 2
  106. # RETRY_HTTP_CODES = [500, 502, 503, 504, 400, 408]
  107. # POST_HOST = 'localhost'
  108. # POST_DATABASE = 'postgres'
  109. # POST_USER = 'postgres'
  110. # POST_PORT = '5433'
  111. # POST_PASSWORD = '123456'
  112. POST_HOST = '172.17.0.4'
  113. POST_DATABASE = 'kaiyou'
  114. POST_USER = 'kaiyou'
  115. POST_PORT = '5432'
  116. POST_PASSWORD = 'yjkj8888'
  117. R_HOST = 'localhost'
  118. R_POST = '6379'
  119. #R_PASSWORD = '123456'
  120. # POST_HOST = '192.168.2.200'
  121. # POST_DATABASE = 'kaiyou'
  122. # POST_USER = 'kaiyou'
  123. # POST_PORT = '10432'
  124. # POST_PASSWORD = '123456'