tests.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. __package__ = 'abx.archivebox'
  2. # from django.test import TestCase
  3. # from .toml_util import convert, TOML_HEADER
  4. # TEST_INPUT = """
  5. # [SERVER_CONFIG]
  6. # IS_TTY=False
  7. # USE_COLOR=False
  8. # SHOW_PROGRESS=False
  9. # IN_DOCKER=False
  10. # IN_QEMU=False
  11. # PUID=501
  12. # PGID=20
  13. # CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf
  14. # ONLY_NEW=True
  15. # TIMEOUT=60
  16. # MEDIA_TIMEOUT=3600
  17. # OUTPUT_PERMISSIONS=644
  18. # RESTRICT_FILE_NAMES=windows
  19. # URL_DENYLIST=\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$
  20. # URL_ALLOWLIST=None
  21. # ADMIN_USERNAME=None
  22. # ADMIN_PASSWORD=None
  23. # ENFORCE_ATOMIC_WRITES=True
  24. # TAG_SEPARATOR_PATTERN=[,]
  25. # SECRET_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
  26. # BIND_ADDR=127.0.0.1:8000
  27. # ALLOWED_HOSTS=*
  28. # DEBUG=False
  29. # PUBLIC_INDEX=True
  30. # PUBLIC_SNAPSHOTS=True
  31. # PUBLIC_ADD_VIEW=False
  32. # FOOTER_INFO=Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.
  33. # SNAPSHOTS_PER_PAGE=40
  34. # CUSTOM_TEMPLATES_DIR=None
  35. # TIME_ZONE=UTC
  36. # TIMEZONE=UTC
  37. # REVERSE_PROXY_USER_HEADER=Remote-User
  38. # REVERSE_PROXY_WHITELIST=
  39. # LOGOUT_REDIRECT_URL=/
  40. # PREVIEW_ORIGINALS=True
  41. # LDAP=False
  42. # LDAP_SERVER_URI=None
  43. # LDAP_BIND_DN=None
  44. # LDAP_BIND_PASSWORD=None
  45. # LDAP_USER_BASE=None
  46. # LDAP_USER_FILTER=None
  47. # LDAP_USERNAME_ATTR=None
  48. # LDAP_FIRSTNAME_ATTR=None
  49. # LDAP_LASTNAME_ATTR=None
  50. # LDAP_EMAIL_ATTR=None
  51. # LDAP_CREATE_SUPERUSER=False
  52. # SAVE_TITLE=True
  53. # SAVE_FAVICON=True
  54. # SAVE_WGET=True
  55. # SAVE_WGET_REQUISITES=True
  56. # SAVE_SINGLEFILE=True
  57. # SAVE_READABILITY=True
  58. # SAVE_MERCURY=True
  59. # SAVE_HTMLTOTEXT=True
  60. # SAVE_PDF=True
  61. # SAVE_SCREENSHOT=True
  62. # SAVE_DOM=True
  63. # SAVE_HEADERS=True
  64. # SAVE_WARC=True
  65. # SAVE_GIT=True
  66. # SAVE_MEDIA=True
  67. # SAVE_ARCHIVE_DOT_ORG=True
  68. # RESOLUTION=1440,2000
  69. # GIT_DOMAINS=github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht
  70. # CHECK_SSL_VALIDITY=True
  71. # MEDIA_MAX_SIZE=750m
  72. # USER_AGENT=None
  73. # CURL_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)
  74. # WGET_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5
  75. # CHROME_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)
  76. # COOKIES_FILE=None
  77. # CHROME_USER_DATA_DIR=None
  78. # CHROME_TIMEOUT=0
  79. # CHROME_HEADLESS=True
  80. # CHROME_SANDBOX=True
  81. # CHROME_EXTRA_ARGS=[]
  82. # YOUTUBEDL_ARGS=['--restrict-filenames', '--trim-filenames', '128', '--write-description', '--write-info-json', '--write-annotations', '--write-thumbnail', '--no-call-home', '--write-sub', '--write-auto-subs', '--convert-subs=srt', '--yes-playlist', '--continue', '--no-abort-on-error', '--ignore-errors', '--geo-bypass', '--add-metadata', '--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)']
  83. # YOUTUBEDL_EXTRA_ARGS=[]
  84. # WGET_ARGS=['--no-verbose', '--adjust-extension', '--convert-links', '--force-directories', '--backup-converted', '--span-hosts', '--no-parent', '-e', 'robots=off']
  85. # WGET_EXTRA_ARGS=[]
  86. # CURL_ARGS=['--silent', '--location', '--compressed']
  87. # CURL_EXTRA_ARGS=[]
  88. # GIT_ARGS=['--recursive']
  89. # SINGLEFILE_ARGS=[]
  90. # SINGLEFILE_EXTRA_ARGS=[]
  91. # MERCURY_ARGS=['--format=text']
  92. # MERCURY_EXTRA_ARGS=[]
  93. # FAVICON_PROVIDER=https://www.google.com/s2/favicons?domain={}
  94. # USE_INDEXING_BACKEND=True
  95. # USE_SEARCHING_BACKEND=True
  96. # SEARCH_BACKEND_ENGINE=ripgrep
  97. # SEARCH_BACKEND_HOST_NAME=localhost
  98. # SEARCH_BACKEND_PORT=1491
  99. # SEARCH_BACKEND_PASSWORD=SecretPassword
  100. # SEARCH_PROCESS_HTML=True
  101. # SONIC_COLLECTION=archivebox
  102. # SONIC_BUCKET=snapshots
  103. # SEARCH_BACKEND_TIMEOUT=90
  104. # FTS_SEPARATE_DATABASE=True
  105. # FTS_TOKENIZERS=porter unicode61 remove_diacritics 2
  106. # FTS_SQLITE_MAX_LENGTH=1000000000
  107. # USE_CURL=True
  108. # USE_WGET=True
  109. # USE_SINGLEFILE=True
  110. # USE_READABILITY=True
  111. # USE_MERCURY=True
  112. # USE_GIT=True
  113. # USE_CHROME=True
  114. # USE_NODE=True
  115. # USE_YOUTUBEDL=True
  116. # USE_RIPGREP=True
  117. # CURL_BINARY=curl
  118. # GIT_BINARY=git
  119. # WGET_BINARY=wget
  120. # SINGLEFILE_BINARY=single-file
  121. # READABILITY_BINARY=readability-extractor
  122. # MERCURY_BINARY=postlight-parser
  123. # YOUTUBEDL_BINARY=yt-dlp
  124. # NODE_BINARY=node
  125. # RIPGREP_BINARY=rg
  126. # CHROME_BINARY=chrome
  127. # POCKET_CONSUMER_KEY=None
  128. # USER=squash
  129. # PACKAGE_DIR=/opt/archivebox/archivebox
  130. # TEMPLATES_DIR=/opt/archivebox/archivebox/templates
  131. # ARCHIVE_DIR=/opt/archivebox/data/archive
  132. # SOURCES_DIR=/opt/archivebox/data/sources
  133. # LOGS_DIR=/opt/archivebox/data/logs
  134. # PERSONAS_DIR=/opt/archivebox/data/personas
  135. # URL_DENYLIST_PTN=re.compile('\\.(css|js|otf|ttf|woff|woff2|gstatic\\.com|googleapis\\.com/css)(\\?.*)?$', re.IGNORECASE|re.MULTILINE)
  136. # URL_ALLOWLIST_PTN=None
  137. # DIR_OUTPUT_PERMISSIONS=755
  138. # ARCHIVEBOX_BINARY=/opt/archivebox/.venv/bin/archivebox
  139. # VERSION=0.8.0
  140. # COMMIT_HASH=102e87578c6036bb0132dd1ebd17f8f05ffc880f
  141. # BUILD_TIME=2024-05-15 03:28:05 1715768885
  142. # VERSIONS_AVAILABLE=None
  143. # CAN_UPGRADE=False
  144. # PYTHON_BINARY=/opt/archivebox/.venv/bin/python3.10
  145. # PYTHON_VERSION=3.10.14
  146. # DJANGO_BINARY=/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py
  147. # DJANGO_VERSION=5.0.6 final (0)
  148. # SQLITE_BINARY=/opt/homebrew/Cellar/[email protected]/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py
  149. # SQLITE_VERSION=2.6.0
  150. # CURL_VERSION=curl 8.4.0 (x86_64-apple-darwin23.0)
  151. # WGET_VERSION=GNU Wget 1.24.5
  152. # WGET_AUTO_COMPRESSION=True
  153. # RIPGREP_VERSION=ripgrep 14.1.0
  154. # SINGLEFILE_VERSION=None
  155. # READABILITY_VERSION=None
  156. # MERCURY_VERSION=None
  157. # GIT_VERSION=git version 2.44.0
  158. # YOUTUBEDL_VERSION=2024.04.09
  159. # CHROME_VERSION=Google Chrome 124.0.6367.207
  160. # NODE_VERSION=v21.7.3
  161. # """
  162. # EXPECTED_OUTPUT = TOML_HEADER + '''[SERVER_CONFIG]
  163. # IS_TTY = false
  164. # USE_COLOR = false
  165. # SHOW_PROGRESS = false
  166. # IN_DOCKER = false
  167. # IN_QEMU = false
  168. # PUID = 501
  169. # PGID = 20
  170. # CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf"
  171. # ONLY_NEW = true
  172. # TIMEOUT = 60
  173. # MEDIA_TIMEOUT = 3600
  174. # OUTPUT_PERMISSIONS = 644
  175. # RESTRICT_FILE_NAMES = "windows"
  176. # URL_DENYLIST = "\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$"
  177. # URL_ALLOWLIST = null
  178. # ADMIN_USERNAME = null
  179. # ADMIN_PASSWORD = null
  180. # ENFORCE_ATOMIC_WRITES = true
  181. # TAG_SEPARATOR_PATTERN = "[,]"
  182. # SECRET_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
  183. # BIND_ADDR = "127.0.0.1:8000"
  184. # ALLOWED_HOSTS = "*"
  185. # DEBUG = false
  186. # PUBLIC_INDEX = true
  187. # PUBLIC_SNAPSHOTS = true
  188. # PUBLIC_ADD_VIEW = false
  189. # FOOTER_INFO = "Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests."
  190. # SNAPSHOTS_PER_PAGE = 40
  191. # CUSTOM_TEMPLATES_DIR = null
  192. # TIME_ZONE = "UTC"
  193. # TIMEZONE = "UTC"
  194. # REVERSE_PROXY_USER_HEADER = "Remote-User"
  195. # REVERSE_PROXY_WHITELIST = ""
  196. # LOGOUT_REDIRECT_URL = "/"
  197. # PREVIEW_ORIGINALS = true
  198. # LDAP = false
  199. # LDAP_SERVER_URI = null
  200. # LDAP_BIND_DN = null
  201. # LDAP_BIND_PASSWORD = null
  202. # LDAP_USER_BASE = null
  203. # LDAP_USER_FILTER = null
  204. # LDAP_USERNAME_ATTR = null
  205. # LDAP_FIRSTNAME_ATTR = null
  206. # LDAP_LASTNAME_ATTR = null
  207. # LDAP_EMAIL_ATTR = null
  208. # LDAP_CREATE_SUPERUSER = false
  209. # SAVE_TITLE = true
  210. # SAVE_FAVICON = true
  211. # SAVE_WGET = true
  212. # SAVE_WGET_REQUISITES = true
  213. # SAVE_SINGLEFILE = true
  214. # SAVE_READABILITY = true
  215. # SAVE_MERCURY = true
  216. # SAVE_HTMLTOTEXT = true
  217. # SAVE_PDF = true
  218. # SAVE_SCREENSHOT = true
  219. # SAVE_DOM = true
  220. # SAVE_HEADERS = true
  221. # SAVE_WARC = true
  222. # SAVE_GIT = true
  223. # SAVE_MEDIA = true
  224. # SAVE_ARCHIVE_DOT_ORG = true
  225. # RESOLUTION = [1440, 2000]
  226. # GIT_DOMAINS = "github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht"
  227. # CHECK_SSL_VALIDITY = true
  228. # MEDIA_MAX_SIZE = "750m"
  229. # USER_AGENT = null
  230. # CURL_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)"
  231. # WGET_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5"
  232. # CHROME_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)"
  233. # COOKIES_FILE = null
  234. # CHROME_USER_DATA_DIR = null
  235. # CHROME_TIMEOUT = false
  236. # CHROME_HEADLESS = true
  237. # CHROME_SANDBOX = true
  238. # CHROME_EXTRA_ARGS = []
  239. # YOUTUBEDL_ARGS = ["--restrict-filenames", "--trim-filenames", "128", "--write-description", "--write-info-json", "--write-annotations", "--write-thumbnail", "--no-call-home", "--write-sub", "--write-auto-subs", "--convert-subs=srt", "--yes-playlist", "--continue", "--no-abort-on-error", "--ignore-errors", "--geo-bypass", "--add-metadata", "--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)"]
  240. # YOUTUBEDL_EXTRA_ARGS = []
  241. # WGET_ARGS = ["--no-verbose", "--adjust-extension", "--convert-links", "--force-directories", "--backup-converted", "--span-hosts", "--no-parent", "-e", "robots=off"]
  242. # WGET_EXTRA_ARGS = []
  243. # CURL_ARGS = ["--silent", "--location", "--compressed"]
  244. # CURL_EXTRA_ARGS = []
  245. # GIT_ARGS = ["--recursive"]
  246. # SINGLEFILE_ARGS = []
  247. # SINGLEFILE_EXTRA_ARGS = []
  248. # MERCURY_ARGS = ["--format=text"]
  249. # MERCURY_EXTRA_ARGS = []
  250. # FAVICON_PROVIDER = "https://www.google.com/s2/favicons?domain={}"
  251. # USE_INDEXING_BACKEND = true
  252. # USE_SEARCHING_BACKEND = true
  253. # SEARCH_BACKEND_ENGINE = "ripgrep"
  254. # SEARCH_BACKEND_HOST_NAME = "localhost"
  255. # SEARCH_BACKEND_PORT = 1491
  256. # SEARCH_BACKEND_PASSWORD = "SecretPassword"
  257. # SEARCH_PROCESS_HTML = true
  258. # SONIC_COLLECTION = "archivebox"
  259. # SONIC_BUCKET = "snapshots"
  260. # SEARCH_BACKEND_TIMEOUT = 90
  261. # FTS_SEPARATE_DATABASE = true
  262. # FTS_TOKENIZERS = "porter unicode61 remove_diacritics 2"
  263. # FTS_SQLITE_MAX_LENGTH = 1000000000
  264. # USE_CURL = true
  265. # USE_WGET = true
  266. # USE_SINGLEFILE = true
  267. # USE_READABILITY = true
  268. # USE_MERCURY = true
  269. # USE_GIT = true
  270. # USE_CHROME = true
  271. # USE_NODE = true
  272. # USE_YOUTUBEDL = true
  273. # USE_RIPGREP = true
  274. # CURL_BINARY = "curl"
  275. # GIT_BINARY = "git"
  276. # WGET_BINARY = "wget"
  277. # SINGLEFILE_BINARY = "single-file"
  278. # READABILITY_BINARY = "readability-extractor"
  279. # MERCURY_BINARY = "postlight-parser"
  280. # YOUTUBEDL_BINARY = "yt-dlp"
  281. # NODE_BINARY = "node"
  282. # RIPGREP_BINARY = "rg"
  283. # CHROME_BINARY = "chrome"
  284. # POCKET_CONSUMER_KEY = null
  285. # USER = "squash"
  286. # PACKAGE_DIR = "/opt/archivebox/archivebox"
  287. # TEMPLATES_DIR = "/opt/archivebox/archivebox/templates"
  288. # ARCHIVE_DIR = "/opt/archivebox/data/archive"
  289. # SOURCES_DIR = "/opt/archivebox/data/sources"
  290. # LOGS_DIR = "/opt/archivebox/data/logs"
  291. # PERSONAS_DIR = "/opt/archivebox/data/personas"
  292. # URL_DENYLIST_PTN = "re.compile(\'\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$\', re.IGNORECASE|re.MULTILINE)"
  293. # URL_ALLOWLIST_PTN = null
  294. # DIR_OUTPUT_PERMISSIONS = 755
  295. # ARCHIVEBOX_BINARY = "/opt/archivebox/.venv/bin/archivebox"
  296. # VERSION = "0.8.0"
  297. # COMMIT_HASH = "102e87578c6036bb0132dd1ebd17f8f05ffc880f"
  298. # BUILD_TIME = "2024-05-15 03:28:05 1715768885"
  299. # VERSIONS_AVAILABLE = null
  300. # CAN_UPGRADE = false
  301. # PYTHON_BINARY = "/opt/archivebox/.venv/bin/python3.10"
  302. # PYTHON_VERSION = "3.10.14"
  303. # DJANGO_BINARY = "/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py"
  304. # DJANGO_VERSION = "5.0.6 final (0)"
  305. # SQLITE_BINARY = "/opt/homebrew/Cellar/[email protected]/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py"
  306. # SQLITE_VERSION = "2.6.0"
  307. # CURL_VERSION = "curl 8.4.0 (x86_64-apple-darwin23.0)"
  308. # WGET_VERSION = "GNU Wget 1.24.5"
  309. # WGET_AUTO_COMPRESSION = true
  310. # RIPGREP_VERSION = "ripgrep 14.1.0"
  311. # SINGLEFILE_VERSION = null
  312. # READABILITY_VERSION = null
  313. # MERCURY_VERSION = null
  314. # GIT_VERSION = "git version 2.44.0"
  315. # YOUTUBEDL_VERSION = "2024.04.09"
  316. # CHROME_VERSION = "Google Chrome 124.0.6367.207"
  317. # NODE_VERSION = "v21.7.3"'''
  318. # class IniToTomlTests(TestCase):
  319. # def test_convert(self):
  320. # first_output = convert(TEST_INPUT) # make sure ini -> toml parses correctly
  321. # second_output = convert(first_output) # make sure toml -> toml parses/dumps consistently
  322. # assert first_output == second_output == EXPECTED_OUTPUT # make sure parsing is indempotent
  323. # # DEBUGGING
  324. # import sys
  325. # import difflib
  326. # sys.stdout.writelines(difflib.context_diff(first_output, second_output, fromfile='first', tofile='second'))
  327. # print(repr(second_output))