Jelajahi Sumber

add headers.json and fix relative singlefile path resolving for sonic

Nick Sweeting 4 tahun lalu
induk
melakukan
24e24934f7
2 mengubah file dengan 3 tambahan dan 1 penghapusan
  1. 1 0
      archivebox/index/schema.py
  2. 2 1
      archivebox/search/utils.py

+ 1 - 0
archivebox/index/schema.py

@@ -427,6 +427,7 @@ class Link:
             'archive_org_path': 'https://web.archive.org/web/{}'.format(self.base_url),
             'archive_org_path': 'https://web.archive.org/web/{}'.format(self.base_url),
             'git_path': 'git/',
             'git_path': 'git/',
             'media_path': 'media/',
             'media_path': 'media/',
+            'headers_path': 'headers.json',
         }
         }
         if self.is_static:
         if self.is_static:
             # static binary files like PDF and images are handled slightly differently.
             # static binary files like PDF and images are handled slightly differently.

+ 2 - 1
archivebox/search/utils.py

@@ -34,10 +34,11 @@ def get_indexable_content(results: QuerySet):
         return []
         return []
     # This should come from a plugin interface
     # This should come from a plugin interface
 
 
+    # TODO: banish this duplication and get these from the extractor file
     if method == 'readability':
     if method == 'readability':
         return get_file_result_content(res, 'content.txt')
         return get_file_result_content(res, 'content.txt')
     elif method == 'singlefile':
     elif method == 'singlefile':
-        return get_file_result_content(res, '')
+        return get_file_result_content(res,'',use_pwd=True)
     elif method == 'dom':
     elif method == 'dom':
         return get_file_result_content(res,'',use_pwd=True)
         return get_file_result_content(res,'',use_pwd=True)
     elif method == 'wget':
     elif method == 'wget':