فهرست منبع

add more explanation about snapshot.save timestamp bump

Nick Sweeting 4 سال پیش
والد
کامیت
084cf7ff51
1فایلهای تغییر یافته به همراه8 افزوده شده و 1 حذف شده
  1. 8 1
      archivebox/extractors/__init__.py

+ 8 - 1
archivebox/extractors/__init__.py

@@ -114,7 +114,14 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
                     write_search_index(link=link, texts=result.index_texts)
                     ArchiveResult.objects.create(snapshot=snapshot, extractor=method_name, cmd=result.cmd, cmd_version=result.cmd_version,
                                                  output=result.output, pwd=result.pwd, start_ts=result.start_ts, end_ts=result.end_ts, status=result.status)
-                    snapshot.save()  # bump the updated time
+
+
+                    # bump the updated time on the main Snapshot here, this is critical
+                    # to be able to cache summaries of the ArchiveResults for a given
+                    # snapshot without having to load all the results from the DB each time.
+                    # (we use {Snapshot.id}-{Snapshot.updated} as the cache key and assume
+                    # ArchiveResults are unchanged as long as the updated timestamp is unchanged)
+                    snapshot.save()
                 else:
                     # print('{black}      X {}{reset}'.format(method_name, **ANSI))
                     stats['skipped'] += 1