Selaa lähdekoodia

add more explanation about snapshot.save timestamp bump

Nick Sweeting 4 vuotta sitten
vanhempi
sitoutus
084cf7ff51
1 muutettua tiedostoa jossa 8 lisäystä ja 1 poistoa
  1. 8 1
      archivebox/extractors/__init__.py

+ 8 - 1
archivebox/extractors/__init__.py

@@ -114,7 +114,14 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
                     write_search_index(link=link, texts=result.index_texts)
                     write_search_index(link=link, texts=result.index_texts)
                     ArchiveResult.objects.create(snapshot=snapshot, extractor=method_name, cmd=result.cmd, cmd_version=result.cmd_version,
                     ArchiveResult.objects.create(snapshot=snapshot, extractor=method_name, cmd=result.cmd, cmd_version=result.cmd_version,
                                                  output=result.output, pwd=result.pwd, start_ts=result.start_ts, end_ts=result.end_ts, status=result.status)
                                                  output=result.output, pwd=result.pwd, start_ts=result.start_ts, end_ts=result.end_ts, status=result.status)
-                    snapshot.save()  # bump the updated time
+
+
+                    # bump the updated time on the main Snapshot here, this is critical
+                    # to be able to cache summaries of the ArchiveResults for a given
+                    # snapshot without having to load all the results from the DB each time.
+                    # (we use {Snapshot.id}-{Snapshot.updated} as the cache key and assume
+                    # ArchiveResults are unchanged as long as the updated timestamp is unchanged)
+                    snapshot.save()
                 else:
                 else:
                     # print('{black}      X {}{reset}'.format(method_name, **ANSI))
                     # print('{black}      X {}{reset}'.format(method_name, **ANSI))
                     stats['skipped'] += 1
                     stats['skipped'] += 1