settings_logging.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. __package__ = 'archivebox.core'
  2. import re
  3. import os
  4. import tempfile
  5. import logging
  6. import pydantic
  7. import django.template
  8. from archivebox.config import CONSTANTS
  9. IGNORABLE_URL_PATTERNS = [
  10. re.compile(r"/.*/?apple-touch-icon.*\.png"),
  11. re.compile(r"/.*/?favicon\.ico"),
  12. re.compile(r"/.*/?robots\.txt"),
  13. re.compile(r"/.*/?.*\.(css|js)\.map"),
  14. re.compile(r"/.*/?.*\.(css|js)\.map"),
  15. re.compile(r"/static/.*"),
  16. re.compile(r"/admin/jsi18n/"),
  17. ]
  18. class NoisyRequestsFilter(logging.Filter):
  19. def filter(self, record) -> bool:
  20. logline = record.getMessage()
  21. # '"GET /api/v1/docs HTTP/1.1" 200 1023'
  22. # '"GET /static/admin/js/SelectFilter2.js HTTP/1.1" 200 15502'
  23. # '"GET /static/admin/js/SelectBox.js HTTP/1.1" 304 0'
  24. # '"GET /admin/jsi18n/ HTTP/1.1" 200 3352'
  25. # '"GET /admin/api/apitoken/0191bbf8-fd5e-0b8c-83a8-0f32f048a0af/change/ HTTP/1.1" 200 28778'
  26. # ignore harmless 404s for the patterns in IGNORABLE_URL_PATTERNS
  27. for pattern in IGNORABLE_URL_PATTERNS:
  28. ignorable_GET_request = re.compile(f'"GET {pattern.pattern} HTTP/.*" (2..|30.|404) .+$', re.I | re.M)
  29. if ignorable_GET_request.match(logline):
  30. return False
  31. ignorable_404_pattern = re.compile(f'Not Found: {pattern.pattern}', re.I | re.M)
  32. if ignorable_404_pattern.match(logline):
  33. return False
  34. return True
  35. class CustomOutboundWebhookLogFormatter(logging.Formatter):
  36. def format(self, record):
  37. result = super().format(record)
  38. return result.replace('HTTP Request: ', 'OutboundWebhook: ')
  39. ERROR_LOG = tempfile.NamedTemporaryFile().name
  40. LOGS_DIR = CONSTANTS.LOGS_DIR
  41. if os.access(LOGS_DIR, os.W_OK) and LOGS_DIR.is_dir():
  42. ERROR_LOG = (LOGS_DIR / 'errors.log')
  43. else:
  44. # historically too many edge cases here around creating log dir w/ correct permissions early on
  45. # if there's an issue on startup, we trash the log and let user figure it out via stdout/stderr
  46. # print(f'[!] WARNING: data/logs dir does not exist. Logging to temp file: {ERROR_LOG}')
  47. pass
  48. LOG_LEVEL_DATABASE = 'WARNING' # change to DEBUG to log all SQL queries
  49. LOG_LEVEL_REQUEST = 'WARNING' # if DEBUG else 'WARNING'
  50. if LOG_LEVEL_DATABASE == 'DEBUG':
  51. db_logger = logging.getLogger('django.db.backends')
  52. db_logger.setLevel(logging.DEBUG)
  53. db_logger.addHandler(logging.StreamHandler())
  54. SETTINGS_LOGGING = {
  55. "version": 1,
  56. "disable_existing_loggers": False,
  57. "formatters": {
  58. "rich": {
  59. "datefmt": "[%Y-%m-%d %H:%M:%S]",
  60. "format": "%(name)s %(message)s",
  61. },
  62. "outbound_webhooks": {
  63. "()": CustomOutboundWebhookLogFormatter,
  64. "datefmt": "[%Y-%m-%d %H:%M:%S]",
  65. },
  66. },
  67. "filters": {
  68. "noisyrequestsfilter": {
  69. "()": NoisyRequestsFilter,
  70. },
  71. "require_debug_false": {
  72. "()": "django.utils.log.RequireDebugFalse",
  73. },
  74. "require_debug_true": {
  75. "()": "django.utils.log.RequireDebugTrue",
  76. },
  77. },
  78. "handlers": {
  79. "default": {
  80. "class": "rich.logging.RichHandler",
  81. "formatter": "rich",
  82. "level": "DEBUG",
  83. "markup": False,
  84. "rich_tracebacks": False, # Use standard Python tracebacks (no frame/box)
  85. "filters": ["noisyrequestsfilter"],
  86. },
  87. "logfile": {
  88. "level": "INFO",
  89. "class": "logging.handlers.RotatingFileHandler",
  90. "filename": ERROR_LOG,
  91. "maxBytes": 1024 * 1024 * 25, # 25 MB
  92. "backupCount": 10,
  93. "formatter": "rich",
  94. "filters": ["noisyrequestsfilter"],
  95. },
  96. "outbound_webhooks": {
  97. "class": "rich.logging.RichHandler",
  98. "markup": False,
  99. "rich_tracebacks": False, # Use standard Python tracebacks (no frame/box)
  100. "formatter": "outbound_webhooks",
  101. },
  102. # "mail_admins": {
  103. # "level": "ERROR",
  104. # "filters": ["require_debug_false"],
  105. # "class": "django.utils.log.AdminEmailHandler",
  106. # },
  107. "null": {
  108. "class": "logging.NullHandler",
  109. },
  110. },
  111. "root": {
  112. "handlers": ["default", "logfile"],
  113. "level": "INFO",
  114. "formatter": "rich",
  115. },
  116. "loggers": {
  117. "api": {
  118. "handlers": ["default", "logfile"],
  119. "level": "DEBUG",
  120. "propagate": False,
  121. },
  122. "checks": {
  123. "handlers": ["default", "logfile"],
  124. "level": "DEBUG",
  125. "propagate": False,
  126. },
  127. "core": {
  128. "handlers": ["default", "logfile"],
  129. "level": "DEBUG",
  130. "propagate": False,
  131. },
  132. "httpx": {
  133. "handlers": ["outbound_webhooks"],
  134. "level": "INFO",
  135. "formatter": "outbound_webhooks",
  136. "propagate": False,
  137. },
  138. "django": {
  139. "handlers": ["default", "logfile"],
  140. "level": "INFO",
  141. "filters": ["noisyrequestsfilter"],
  142. "propagate": False,
  143. },
  144. "django.utils.autoreload": {
  145. "propagate": False,
  146. "handlers": [],
  147. "level": "ERROR",
  148. },
  149. "django.channels.server": {
  150. # see archivebox.misc.monkey_patches.ModifiedAccessLogGenerator for dedicated daphne server logging settings
  151. "propagate": False,
  152. "handlers": ["default", "logfile"],
  153. "level": "INFO",
  154. "filters": ["noisyrequestsfilter"],
  155. },
  156. "django.server": { # logs all requests (2xx, 3xx, 4xx)
  157. "propagate": False,
  158. "handlers": ["default", "logfile"],
  159. "level": "INFO",
  160. "filters": ["noisyrequestsfilter"],
  161. },
  162. "django.request": { # only logs 4xx and 5xx errors
  163. "propagate": False,
  164. "handlers": ["default", "logfile"],
  165. "level": "ERROR",
  166. "filters": ["noisyrequestsfilter"],
  167. },
  168. "django.db.backends": {
  169. "propagate": False,
  170. "handlers": ["default"],
  171. "level": LOG_LEVEL_DATABASE,
  172. },
  173. },
  174. }