|
|
@@ -386,10 +386,115 @@ class ModelWithHealthStats(models.Model):
|
|
|
|
|
|
|
|
|
|
|
|
+class ModelWithOutputDir(ABIDModel):
|
|
|
+ class Meta:
|
|
|
+ abstract = True
|
|
|
+
|
|
|
+ # output_dir = models.FilePathField(path=CONSTANTS.DATA_DIR, max_length=200, blank=True, null=True)
|
|
|
+ # output_files = models.JSONField(default=dict)
|
|
|
|
|
|
+ def save(self, *args, write_indexes=False, **kwargs) -> None:
|
|
|
+ super().save(*args, **kwargs)
|
|
|
+ if write_indexes:
|
|
|
+ self.write_indexes()
|
|
|
|
|
|
-
|
|
|
-
|
|
|
+ @property
|
|
|
+ def output_dir_type(self) -> str:
|
|
|
+ """Get the model type parent directory name that holds this object's data e.g. 'archiveresults'"""
|
|
|
+ parent_dir = getattr(self, 'output_dir_parent', self._meta.model_name)
|
|
|
+ assert parent_dir
|
|
|
+ return f'{parent_dir}s' # e.g. archiveresults
|
|
|
+
|
|
|
+ @property
|
|
|
+ def output_dir_name(self) -> str:
|
|
|
+ """Get the subdirectory name for the filesystem directory that holds this object's data e.g. 'snp_2342353k2jn3j32l4324'"""
|
|
|
+ assert self.ABID
|
|
|
+ return str(self.ABID) # e.g. snp_2342353k2jn3j32l4324
|
|
|
+
|
|
|
+ @property
|
|
|
+ def output_dir_str(self) -> str:
|
|
|
+ """Get relateive the filesystem directory Path that holds that data for this object e.g. 'snapshots/snp_2342353k2jn3j32l4324'"""
|
|
|
+ return f'{self.output_dir_type}/{self.output_dir_name}' # e.g. snapshots/snp_2342353k2jn3j32l4324
|
|
|
+
|
|
|
+ @property
|
|
|
+ def OUTPUT_DIR(self) -> Path:
|
|
|
+ """Get absolute filesystem directory Path that holds that data for this object e.g. Path('/data/snapshots/snp_2342353k2jn3j32l4324')"""
|
|
|
+ from archivebox import DATA_DIR
|
|
|
+ return DATA_DIR / self.output_dir_str # e.g. /data/snapshots/snp_2342353k2jn3j32l4324
|
|
|
+
|
|
|
+ def write_indexes(self):
|
|
|
+ """Write the Snapshot json, html, and merkle indexes to its output dir"""
|
|
|
+ print(f'{self}.write_indexes()')
|
|
|
+ self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
+ self.migrate_output_dir()
|
|
|
+ self.save_merkle_index()
|
|
|
+ self.save_html_index()
|
|
|
+ self.save_json_index()
|
|
|
+ self.save_symlinks_index()
|
|
|
+
|
|
|
+ def migrate_output_dir(self):
|
|
|
+ """Move the output files to the new folder structure if needed"""
|
|
|
+ print(f'{self}.migrate_output_dir()')
|
|
|
+ self.migrate_from_0_7_2()
|
|
|
+ self.migrate_from_0_8_6()
|
|
|
+ # ... future migrations here
|
|
|
+
|
|
|
+ def migrate_from_0_7_2(self) -> None:
|
|
|
+ """Migrate output_dir generated by ArchiveBox <= 0.7.2 to current version"""
|
|
|
+ print(f'{self}.migrate_from_0_7_2()')
|
|
|
+ # move /data/archive/<timestamp> -> /data/archive/snapshots/<abid>
|
|
|
+ # update self.output_path = /data/archive/snapshots/<abid>
|
|
|
+ pass
|
|
|
+
|
|
|
+ def migrate_from_0_8_6(self) -> None:
|
|
|
+ """Migrate output_dir generated by ArchiveBox <= 0.8.6 to current version"""
|
|
|
+ # ... future migration code here ...
|
|
|
+ print(f'{self}.migrate_from_0_8_6()')
|
|
|
+ pass
|
|
|
+
|
|
|
+ def save_merkle_index(self, **kwargs) -> None:
|
|
|
+ """Write the ./.index.merkle file to the output dir"""
|
|
|
+ # write self.generate_merkle_tree() to self.output_dir / '.index.merkle'
|
|
|
+ print(f'{self}.save_merkle_index()')
|
|
|
+ pass
|
|
|
+
|
|
|
+ def save_html_index(self, **kwargs) -> None:
|
|
|
+ # write self.as_html() to self.output_dir / 'index.html'
|
|
|
+ print(f'{self}.save_html_index()')
|
|
|
+ pass
|
|
|
+
|
|
|
+ def save_json_index(self, **kwargs) -> None:
|
|
|
+ print(f'{self}.save_json_index()')
|
|
|
+ # write self.as_json() to self.output_dir / 'index.json'
|
|
|
+ pass
|
|
|
+
|
|
|
+ def save_symlinks_index(self) -> None:
|
|
|
+ print(f'{self}.save_symlinks_index()')
|
|
|
+ # ln -s ../../../../self.output_dir data/index/snapshots_by_date/2024-01-01/example.com/<abid>
|
|
|
+ # ln -s ../../../../self.output_dir data/index/snapshots_by_domain/example.com/2024-01-01/<abid>
|
|
|
+ # ln -s self.output_dir data/archive/1453452234234.21445
|
|
|
+ pass
|
|
|
+
|
|
|
+ def as_json(self) -> dict:
|
|
|
+ """Get the object's properties as a dict"""
|
|
|
+ # dump the object's properties to a json-ready dict
|
|
|
+ return {
|
|
|
+ 'TYPE': self.TYPE,
|
|
|
+ 'id': self.id,
|
|
|
+ 'abid': str(self.ABID),
|
|
|
+ 'str': str(self),
|
|
|
+ 'modified_at': self.modified_at,
|
|
|
+ 'created_at': self.created_at,
|
|
|
+ 'created_by_id': self.created_by_id,
|
|
|
+ 'status': getattr(self, 'status', None),
|
|
|
+ 'retry_at': getattr(self, 'retry_at', None),
|
|
|
+ 'notes': getattr(self, 'notes', None),
|
|
|
+ }
|
|
|
+
|
|
|
+ def as_html(self) -> str:
|
|
|
+ """Get the object's properties as a html string"""
|
|
|
+ # render snapshot_detail.html template with self as context and return html string
|
|
|
+ return ''
|
|
|
|
|
|
|
|
|
####################################################
|