settings_logging.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. __package__ = 'archivebox.core'
  2. import re
  3. import os
  4. import shutil
  5. import tempfile
  6. import logging
  7. import pydantic
  8. import django.template
  9. from archivebox.config import CONSTANTS
  10. from archivebox.misc.logging import IS_TTY
  11. IGNORABLE_URL_PATTERNS = [
  12. re.compile(r"/.*/?apple-touch-icon.*\.png"),
  13. re.compile(r"/.*/?favicon\.ico"),
  14. re.compile(r"/.*/?robots\.txt"),
  15. re.compile(r"/.*/?.*\.(css|js)\.map"),
  16. re.compile(r"/.*/?.*\.(css|js)\.map"),
  17. re.compile(r"/static/.*"),
  18. re.compile(r"/admin/jsi18n/"),
  19. ]
  20. class NoisyRequestsFilter(logging.Filter):
  21. def filter(self, record) -> bool:
  22. logline = record.getMessage()
  23. # '"GET /api/v1/docs HTTP/1.1" 200 1023'
  24. # '"GET /static/admin/js/SelectFilter2.js HTTP/1.1" 200 15502'
  25. # '"GET /static/admin/js/SelectBox.js HTTP/1.1" 304 0'
  26. # '"GET /admin/jsi18n/ HTTP/1.1" 200 3352'
  27. # '"GET /admin/api/apitoken/0191bbf8-fd5e-0b8c-83a8-0f32f048a0af/change/ HTTP/1.1" 200 28778'
  28. # ignore harmless 404s for the patterns in IGNORABLE_URL_PATTERNS
  29. for pattern in IGNORABLE_URL_PATTERNS:
  30. ignorable_GET_request = re.compile(f'"GET {pattern.pattern} HTTP/.*" (2..|30.|404) .+$', re.I | re.M)
  31. if ignorable_GET_request.match(logline):
  32. return False
  33. ignorable_404_pattern = re.compile(f'Not Found: {pattern.pattern}', re.I | re.M)
  34. if ignorable_404_pattern.match(logline):
  35. return False
  36. return True
  37. class CustomOutboundWebhookLogFormatter(logging.Formatter):
  38. def format(self, record):
  39. result = super().format(record)
  40. return result.replace('HTTP Request: ', 'OutboundWebhook: ')
  41. ERROR_LOG = tempfile.NamedTemporaryFile().name
  42. LOGS_DIR = CONSTANTS.LOGS_DIR
  43. if os.access(LOGS_DIR, os.W_OK) and LOGS_DIR.is_dir():
  44. ERROR_LOG = (LOGS_DIR / 'errors.log')
  45. else:
  46. # historically too many edge cases here around creating log dir w/ correct permissions early on
  47. # if there's an issue on startup, we trash the log and let user figure it out via stdout/stderr
  48. # print(f'[!] WARNING: data/logs dir does not exist. Logging to temp file: {ERROR_LOG}')
  49. pass
  50. LOG_LEVEL_DATABASE = 'WARNING' # change to DEBUG to log all SQL queries
  51. LOG_LEVEL_REQUEST = 'WARNING' # if DEBUG else 'WARNING'
  52. if LOG_LEVEL_DATABASE == 'DEBUG':
  53. db_logger = logging.getLogger('django.db.backends')
  54. db_logger.setLevel(logging.DEBUG)
  55. db_logger.addHandler(logging.StreamHandler())
  56. SETTINGS_LOGGING = {
  57. "version": 1,
  58. "disable_existing_loggers": False,
  59. "formatters": {
  60. "rich": {
  61. "datefmt": "[%Y-%m-%d %H:%M:%S]",
  62. # "format": "{asctime} {levelname} {module} {name} {message} {username}",
  63. "format": "%(name)s %(message)s",
  64. },
  65. "outbound_webhooks": {
  66. "()": CustomOutboundWebhookLogFormatter,
  67. "datefmt": "[%Y-%m-%d %H:%M:%S]",
  68. },
  69. },
  70. "filters": {
  71. "noisyrequestsfilter": {
  72. "()": NoisyRequestsFilter,
  73. },
  74. "require_debug_false": {
  75. "()": "django.utils.log.RequireDebugFalse",
  76. },
  77. "require_debug_true": {
  78. "()": "django.utils.log.RequireDebugTrue",
  79. },
  80. },
  81. "handlers": {
  82. # "console": {
  83. # "level": "DEBUG",
  84. # 'formatter': 'simple',
  85. # "class": "logging.StreamHandler",
  86. # 'filters': ['noisyrequestsfilter', 'add_extra_logging_attrs'],
  87. # },
  88. "default": {
  89. "class": "rich.logging.RichHandler",
  90. "formatter": "rich",
  91. "level": "DEBUG",
  92. "markup": False,
  93. "rich_tracebacks": IS_TTY,
  94. "filters": ["noisyrequestsfilter"],
  95. "tracebacks_suppress": [
  96. django,
  97. pydantic,
  98. ],
  99. "tracebacks_width": shutil.get_terminal_size((100, 10)).columns - 1,
  100. "tracebacks_word_wrap": False,
  101. "tracebacks_show_locals": False,
  102. },
  103. "logfile": {
  104. "level": "INFO",
  105. "class": "logging.handlers.RotatingFileHandler",
  106. "filename": ERROR_LOG,
  107. "maxBytes": 1024 * 1024 * 25, # 25 MB
  108. "backupCount": 10,
  109. "formatter": "rich",
  110. "filters": ["noisyrequestsfilter"],
  111. },
  112. "outbound_webhooks": {
  113. "class": "rich.logging.RichHandler",
  114. "markup": False,
  115. "rich_tracebacks": True,
  116. "formatter": "outbound_webhooks",
  117. },
  118. # "mail_admins": {
  119. # "level": "ERROR",
  120. # "filters": ["require_debug_false"],
  121. # "class": "django.utils.log.AdminEmailHandler",
  122. # },
  123. "null": {
  124. "class": "logging.NullHandler",
  125. },
  126. },
  127. "root": {
  128. "handlers": ["default", "logfile"],
  129. "level": "INFO",
  130. "formatter": "rich",
  131. },
  132. "loggers": {
  133. "api": {
  134. "handlers": ["default", "logfile"],
  135. "level": "DEBUG",
  136. "propagate": False,
  137. },
  138. "checks": {
  139. "handlers": ["default", "logfile"],
  140. "level": "DEBUG",
  141. "propagate": False,
  142. },
  143. "core": {
  144. "handlers": ["default", "logfile"],
  145. "level": "DEBUG",
  146. "propagate": False,
  147. },
  148. "httpx": {
  149. "handlers": ["outbound_webhooks"],
  150. "level": "INFO",
  151. "formatter": "outbound_webhooks",
  152. "propagate": False,
  153. },
  154. "django": {
  155. "handlers": ["default", "logfile"],
  156. "level": "INFO",
  157. "filters": ["noisyrequestsfilter"],
  158. "propagate": False,
  159. },
  160. "django.utils.autoreload": {
  161. "propagate": False,
  162. "handlers": [],
  163. "level": "ERROR",
  164. },
  165. "django.channels.server": {
  166. # see archivebox.misc.monkey_patches.ModifiedAccessLogGenerator for dedicated daphne server logging settings
  167. "propagate": False,
  168. "handlers": ["default", "logfile"],
  169. "level": "INFO",
  170. "filters": ["noisyrequestsfilter"],
  171. },
  172. "django.server": { # logs all requests (2xx, 3xx, 4xx)
  173. "propagate": False,
  174. "handlers": ["default", "logfile"],
  175. "level": "INFO",
  176. "filters": ["noisyrequestsfilter"],
  177. },
  178. "django.request": { # only logs 4xx and 5xx errors
  179. "propagate": False,
  180. "handlers": ["default", "logfile"],
  181. "level": "ERROR",
  182. "filters": ["noisyrequestsfilter"],
  183. },
  184. "django.db.backends": {
  185. "propagate": False,
  186. "handlers": ["default"],
  187. "level": LOG_LEVEL_DATABASE,
  188. },
  189. },
  190. }