settings.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. __package__ = 'archivebox.core'
  2. # TODO: add this after we upgrade to Django >=3.2
  3. # https://github.com/typeddjango/django-stubs
  4. # import django_stubs_ext
  5. # django_stubs_ext.monkeypatch()
  6. import os
  7. import sys
  8. import re
  9. import logging
  10. import tempfile
  11. from pathlib import Path
  12. from django.utils.crypto import get_random_string
  13. from ..config import (
  14. DEBUG,
  15. SECRET_KEY,
  16. ALLOWED_HOSTS,
  17. PACKAGE_DIR,
  18. TEMPLATES_DIR_NAME,
  19. CUSTOM_TEMPLATES_DIR,
  20. SQL_INDEX_FILENAME,
  21. OUTPUT_DIR,
  22. LOGS_DIR,
  23. TIMEZONE,
  24. LDAP,
  25. LDAP_SERVER_URI,
  26. LDAP_BIND_DN,
  27. LDAP_BIND_PASSWORD,
  28. LDAP_USER_BASE,
  29. LDAP_USER_FILTER,
  30. LDAP_USERNAME_ATTR,
  31. LDAP_FIRSTNAME_ATTR,
  32. LDAP_LASTNAME_ATTR,
  33. LDAP_EMAIL_ATTR,
  34. )
  35. IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
  36. IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
  37. IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3]
  38. ################################################################################
  39. ### Django Core Settings
  40. ################################################################################
  41. WSGI_APPLICATION = 'core.wsgi.application'
  42. ROOT_URLCONF = 'core.urls'
  43. LOGIN_URL = '/accounts/login/'
  44. LOGOUT_REDIRECT_URL = os.environ.get('LOGOUT_REDIRECT_URL', '/')
  45. PASSWORD_RESET_URL = '/accounts/password_reset/'
  46. APPEND_SLASH = True
  47. DEBUG = DEBUG or ('--debug' in sys.argv)
  48. INSTALLED_APPS = [
  49. 'django.contrib.auth',
  50. 'django.contrib.contenttypes',
  51. 'django.contrib.sessions',
  52. 'django.contrib.messages',
  53. 'django.contrib.staticfiles',
  54. 'django.contrib.admin',
  55. 'solo',
  56. 'core',
  57. 'api',
  58. # Plugins
  59. 'plugins.defaults',
  60. 'plugins.system',
  61. # 'plugins.replaywebpage', # provides UI to view WARC files
  62. # 'plugins.gallerydl', # provides gallerydl dependency + extractor
  63. # 'plugins.browsertrix', # provides browsertrix dependency + extractor
  64. # 'plugins.playwright', # provides playwright dependency
  65. # ...
  66. # someday we may have enough plugins to justify dynamic loading:
  67. # *(path.parent.name for path in (Path(PACKAGE_DIR) / 'plugins').glob('*/apps.py')),,
  68. 'django_extensions',
  69. ]
  70. ################################################################################
  71. ### Staticfile and Template Settings
  72. ################################################################################
  73. STATIC_URL = '/static/'
  74. STATIC_ROOT = Path(PACKAGE_DIR) / 'collected_static'
  75. STATICFILES_DIRS = [
  76. *([str(CUSTOM_TEMPLATES_DIR / 'static')] if CUSTOM_TEMPLATES_DIR else []),
  77. str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'static'),
  78. # Plugins
  79. # str(Path(PACKAGE_DIR) / 'plugins/defaults/static'),
  80. # str(Path(PACKAGE_DIR) / 'plugins/replaywebpage/static'),
  81. # str(Path(PACKAGE_DIR) / 'plugins/gallerydl/static'),
  82. # str(Path(PACKAGE_DIR) / 'plugins/browsertrix/static'),
  83. # str(Path(PACKAGE_DIR) / 'plugins/playwright/static'),
  84. # ...
  85. # someday if there are many more plugins / user-addable plugins:
  86. # *(str(path) for path in (Path(PACKAGE_DIR) / 'plugins').glob('*/static')),
  87. ]
  88. MEDIA_URL = '/archive/'
  89. MEDIA_ROOT = OUTPUT_DIR / 'archive'
  90. TEMPLATE_DIRS = [
  91. *([str(CUSTOM_TEMPLATES_DIR)] if CUSTOM_TEMPLATES_DIR else []),
  92. str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'core'),
  93. str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'admin'),
  94. str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME),
  95. # Plugins
  96. # added by plugins.<PluginName>.apps.<AppName>.ready -> .settings.register_plugin_settings
  97. # str(Path(PACKAGE_DIR) / 'plugins/defaults/templates'),
  98. # str(Path(PACKAGE_DIR) / 'plugins/replaywebpage/templates'),
  99. # str(Path(PACKAGE_DIR) / 'plugins/gallerydl/templates'),
  100. # str(Path(PACKAGE_DIR) / 'plugins/browsertrix/templates'),
  101. # str(Path(PACKAGE_DIR) / 'plugins/playwright/templates'),
  102. # ...
  103. #
  104. # someday if there are many more plugins / user-addable plugins:
  105. # *(str(path) for path in (Path(PACKAGE_DIR) / 'plugins').glob('*/templates')),
  106. ]
  107. TEMPLATES = [
  108. {
  109. 'BACKEND': 'django.template.backends.django.DjangoTemplates',
  110. 'DIRS': TEMPLATE_DIRS,
  111. 'APP_DIRS': True,
  112. 'OPTIONS': {
  113. 'context_processors': [
  114. 'django.template.context_processors.debug',
  115. 'django.template.context_processors.request',
  116. 'django.contrib.auth.context_processors.auth',
  117. 'django.contrib.messages.context_processors.messages',
  118. ],
  119. },
  120. },
  121. ]
  122. # For usage with https://www.jetadmin.io/integrations/django
  123. # INSTALLED_APPS += ['jet_django']
  124. # JET_PROJECT = 'archivebox'
  125. # JET_TOKEN = 'some-api-token-here'
  126. MIDDLEWARE = [
  127. 'core.middleware.TimezoneMiddleware',
  128. 'django.middleware.security.SecurityMiddleware',
  129. 'django.contrib.sessions.middleware.SessionMiddleware',
  130. 'django.middleware.common.CommonMiddleware',
  131. 'django.middleware.csrf.CsrfViewMiddleware',
  132. 'django.contrib.auth.middleware.AuthenticationMiddleware',
  133. 'core.middleware.ReverseProxyAuthMiddleware',
  134. 'django.contrib.messages.middleware.MessageMiddleware',
  135. 'core.middleware.CacheControlMiddleware',
  136. ]
  137. ################################################################################
  138. ### Authentication Settings
  139. ################################################################################
  140. AUTHENTICATION_BACKENDS = [
  141. 'django.contrib.auth.backends.RemoteUserBackend',
  142. 'django.contrib.auth.backends.ModelBackend',
  143. ]
  144. if LDAP:
  145. try:
  146. import ldap
  147. from django_auth_ldap.config import LDAPSearch
  148. global AUTH_LDAP_SERVER_URI
  149. global AUTH_LDAP_BIND_DN
  150. global AUTH_LDAP_BIND_PASSWORD
  151. global AUTH_LDAP_USER_SEARCH
  152. global AUTH_LDAP_USER_ATTR_MAP
  153. AUTH_LDAP_SERVER_URI = LDAP_SERVER_URI
  154. AUTH_LDAP_BIND_DN = LDAP_BIND_DN
  155. AUTH_LDAP_BIND_PASSWORD = LDAP_BIND_PASSWORD
  156. assert AUTH_LDAP_SERVER_URI and LDAP_USERNAME_ATTR and LDAP_USER_FILTER, 'LDAP_* config options must all be set if LDAP=True'
  157. AUTH_LDAP_USER_SEARCH = LDAPSearch(
  158. LDAP_USER_BASE,
  159. ldap.SCOPE_SUBTREE,
  160. '(&(' + LDAP_USERNAME_ATTR + '=%(user)s)' + LDAP_USER_FILTER + ')',
  161. )
  162. AUTH_LDAP_USER_ATTR_MAP = {
  163. 'username': LDAP_USERNAME_ATTR,
  164. 'first_name': LDAP_FIRSTNAME_ATTR,
  165. 'last_name': LDAP_LASTNAME_ATTR,
  166. 'email': LDAP_EMAIL_ATTR,
  167. }
  168. AUTHENTICATION_BACKENDS = [
  169. 'django.contrib.auth.backends.ModelBackend',
  170. 'django_auth_ldap.backend.LDAPBackend',
  171. ]
  172. except ModuleNotFoundError:
  173. sys.stderr.write('[X] Error: Found LDAP=True config but LDAP packages not installed. You may need to run: pip install archivebox[ldap]\n\n')
  174. # dont hard exit here. in case the user is just running "archivebox version" or "archivebox help", we still want those to work despite broken ldap
  175. # sys.exit(1)
  176. ################################################################################
  177. ### Debug Settings
  178. ################################################################################
  179. # only enable debug toolbar when in DEBUG mode with --nothreading (it doesnt work in multithreaded mode)
  180. DEBUG_TOOLBAR = DEBUG and ('--nothreading' in sys.argv) and ('--reload' not in sys.argv)
  181. if DEBUG_TOOLBAR:
  182. try:
  183. import debug_toolbar # noqa
  184. DEBUG_TOOLBAR = True
  185. except ImportError:
  186. DEBUG_TOOLBAR = False
  187. if DEBUG_TOOLBAR:
  188. INSTALLED_APPS = [*INSTALLED_APPS, 'debug_toolbar']
  189. INTERNAL_IPS = ['0.0.0.0', '127.0.0.1', '*']
  190. DEBUG_TOOLBAR_CONFIG = {
  191. "SHOW_TOOLBAR_CALLBACK": lambda request: True,
  192. "RENDER_PANELS": True,
  193. }
  194. DEBUG_TOOLBAR_PANELS = [
  195. 'debug_toolbar.panels.history.HistoryPanel',
  196. 'debug_toolbar.panels.versions.VersionsPanel',
  197. 'debug_toolbar.panels.timer.TimerPanel',
  198. 'debug_toolbar.panels.settings.SettingsPanel',
  199. 'debug_toolbar.panels.headers.HeadersPanel',
  200. 'debug_toolbar.panels.request.RequestPanel',
  201. 'debug_toolbar.panels.sql.SQLPanel',
  202. 'debug_toolbar.panels.staticfiles.StaticFilesPanel',
  203. # 'debug_toolbar.panels.templates.TemplatesPanel', # buggy/slow
  204. 'debug_toolbar.panels.cache.CachePanel',
  205. 'debug_toolbar.panels.signals.SignalsPanel',
  206. 'debug_toolbar.panels.logging.LoggingPanel',
  207. 'debug_toolbar.panels.redirects.RedirectsPanel',
  208. 'debug_toolbar.panels.profiling.ProfilingPanel',
  209. 'djdt_flamegraph.FlamegraphPanel',
  210. ]
  211. MIDDLEWARE = [*MIDDLEWARE, 'debug_toolbar.middleware.DebugToolbarMiddleware']
  212. ################################################################################
  213. ### External Service Settings
  214. ################################################################################
  215. DATABASE_FILE = Path(OUTPUT_DIR) / SQL_INDEX_FILENAME
  216. DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", str(DATABASE_FILE))
  217. DATABASES = {
  218. 'default': {
  219. 'ENGINE': 'django.db.backends.sqlite3',
  220. 'NAME': DATABASE_NAME,
  221. 'OPTIONS': {
  222. 'timeout': 60,
  223. 'check_same_thread': False,
  224. },
  225. 'TIME_ZONE': TIMEZONE,
  226. # DB setup is sometimes modified at runtime by setup_django() in config.py
  227. }
  228. }
  229. CACHE_BACKEND = 'django.core.cache.backends.locmem.LocMemCache'
  230. # CACHE_BACKEND = 'django.core.cache.backends.db.DatabaseCache'
  231. # CACHE_BACKEND = 'django.core.cache.backends.dummy.DummyCache'
  232. CACHES = {
  233. 'default': {
  234. 'BACKEND': CACHE_BACKEND,
  235. 'LOCATION': 'django_cache_default',
  236. }
  237. }
  238. EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
  239. ################################################################################
  240. ### Security Settings
  241. ################################################################################
  242. SECRET_KEY = SECRET_KEY or get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789_')
  243. ALLOWED_HOSTS = ALLOWED_HOSTS.split(',')
  244. SECURE_BROWSER_XSS_FILTER = True
  245. SECURE_CONTENT_TYPE_NOSNIFF = True
  246. SECURE_REFERRER_POLICY = 'strict-origin-when-cross-origin'
  247. CSRF_COOKIE_SECURE = False
  248. SESSION_COOKIE_SECURE = False
  249. SESSION_COOKIE_DOMAIN = None
  250. SESSION_COOKIE_AGE = 1209600 # 2 weeks
  251. SESSION_EXPIRE_AT_BROWSER_CLOSE = False
  252. SESSION_SAVE_EVERY_REQUEST = True
  253. SESSION_ENGINE = "django.contrib.sessions.backends.db"
  254. AUTH_PASSWORD_VALIDATORS = [
  255. {'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator'},
  256. {'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator'},
  257. {'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator'},
  258. {'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator'},
  259. ]
  260. ################################################################################
  261. ### Shell Settings
  262. ################################################################################
  263. SHELL_PLUS = 'ipython'
  264. SHELL_PLUS_PRINT_SQL = False
  265. IPYTHON_ARGUMENTS = ['--no-confirm-exit', '--no-banner']
  266. IPYTHON_KERNEL_DISPLAY_NAME = 'ArchiveBox Django Shell'
  267. if IS_SHELL:
  268. os.environ['PYTHONSTARTUP'] = str(Path(PACKAGE_DIR) / 'core' / 'welcome_message.py')
  269. ################################################################################
  270. ### Internationalization & Localization Settings
  271. ################################################################################
  272. LANGUAGE_CODE = 'en-us'
  273. USE_I18N = True
  274. USE_TZ = True
  275. DATETIME_FORMAT = 'Y-m-d g:iA'
  276. SHORT_DATETIME_FORMAT = 'Y-m-d h:iA'
  277. TIME_ZONE = TIMEZONE # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent
  278. from django.conf.locale.en import formats as en_formats
  279. en_formats.DATETIME_FORMAT = DATETIME_FORMAT
  280. en_formats.SHORT_DATETIME_FORMAT = SHORT_DATETIME_FORMAT
  281. ################################################################################
  282. ### Logging Settings
  283. ################################################################################
  284. IGNORABLE_404_URLS = [
  285. re.compile(r'apple-touch-icon.*\.png$'),
  286. re.compile(r'favicon\.ico$'),
  287. re.compile(r'robots\.txt$'),
  288. re.compile(r'.*\.(css|js)\.map$'),
  289. ]
  290. class NoisyRequestsFilter(logging.Filter):
  291. def filter(self, record) -> bool:
  292. logline = record.getMessage()
  293. # ignore harmless 404s for the patterns in IGNORABLE_404_URLS
  294. for ignorable_url_pattern in IGNORABLE_404_URLS:
  295. ignorable_log_pattern = re.compile(f'^"GET /.*/?{ignorable_url_pattern.pattern[:-1]} HTTP/.*" (200|30.|404) .+$', re.I | re.M)
  296. if ignorable_log_pattern.match(logline):
  297. return False
  298. # ignore staticfile requests that 200 or 30*
  299. ignoreable_200_log_pattern = re.compile(r'"GET /static/.* HTTP/.*" (200|30.) .+', re.I | re.M)
  300. if ignoreable_200_log_pattern.match(logline):
  301. return False
  302. return True
  303. if LOGS_DIR.exists():
  304. ERROR_LOG = (LOGS_DIR / 'errors.log')
  305. else:
  306. # historically too many edge cases here around creating log dir w/ correct permissions early on
  307. # if there's an issue on startup, we trash the log and let user figure it out via stdout/stderr
  308. ERROR_LOG = tempfile.NamedTemporaryFile().name
  309. LOGGING = {
  310. 'version': 1,
  311. 'disable_existing_loggers': False,
  312. 'handlers': {
  313. 'console': {
  314. 'class': 'logging.StreamHandler',
  315. },
  316. 'logfile': {
  317. 'level': 'ERROR',
  318. 'class': 'logging.handlers.RotatingFileHandler',
  319. 'filename': ERROR_LOG,
  320. 'maxBytes': 1024 * 1024 * 25, # 25 MB
  321. 'backupCount': 10,
  322. },
  323. },
  324. 'filters': {
  325. 'noisyrequestsfilter': {
  326. '()': NoisyRequestsFilter,
  327. }
  328. },
  329. 'loggers': {
  330. 'django': {
  331. 'handlers': ['console', 'logfile'],
  332. 'level': 'INFO',
  333. 'filters': ['noisyrequestsfilter'],
  334. },
  335. 'django.server': {
  336. 'handlers': ['console', 'logfile'],
  337. 'level': 'INFO',
  338. 'filters': ['noisyrequestsfilter'],
  339. }
  340. },
  341. }