Przeglądaj źródła

add ModelWithOutputDir base model to manage output directories and index writing

Nick Sweeting 1 rok temu
rodzic
commit
af21c3428b
1 zmienionych plików z 107 dodań i 2 usunięć
  1. 107 2
      archivebox/abid_utils/models.py

+ 107 - 2
archivebox/abid_utils/models.py

@@ -386,10 +386,115 @@ class ModelWithHealthStats(models.Model):
 
 
 
+class ModelWithOutputDir(ABIDModel):
+    class Meta:
+        abstract = True
+        
+    # output_dir = models.FilePathField(path=CONSTANTS.DATA_DIR, max_length=200, blank=True, null=True)
+    # output_files = models.JSONField(default=dict)
 
+    def save(self, *args, write_indexes=False, **kwargs) -> None:
+        super().save(*args, **kwargs)
+        if write_indexes:
+            self.write_indexes()
 
-
-
+    @property
+    def output_dir_type(self) -> str:
+        """Get the model type parent directory name that holds this object's data e.g. 'archiveresults'"""
+        parent_dir = getattr(self, 'output_dir_parent', self._meta.model_name)
+        assert parent_dir
+        return f'{parent_dir}s'  # e.g. archiveresults
+    
+    @property
+    def output_dir_name(self) -> str:
+        """Get the subdirectory name for the filesystem directory that holds this object's data e.g. 'snp_2342353k2jn3j32l4324'"""
+        assert self.ABID
+        return str(self.ABID)    # e.g. snp_2342353k2jn3j32l4324
+    
+    @property
+    def output_dir_str(self) -> str:
+        """Get relateive the filesystem directory Path that holds that data for this object e.g. 'snapshots/snp_2342353k2jn3j32l4324'"""
+        return f'{self.output_dir_type}/{self.output_dir_name}'  # e.g. snapshots/snp_2342353k2jn3j32l4324
+        
+    @property
+    def OUTPUT_DIR(self) -> Path:
+        """Get absolute filesystem directory Path that holds that data for this object e.g. Path('/data/snapshots/snp_2342353k2jn3j32l4324')"""
+        from archivebox import DATA_DIR
+        return DATA_DIR / self.output_dir_str        # e.g. /data/snapshots/snp_2342353k2jn3j32l4324
+        
+    def write_indexes(self):
+        """Write the Snapshot json, html, and merkle indexes to its output dir"""
+        print(f'{self}.write_indexes()')
+        self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+        self.migrate_output_dir()
+        self.save_merkle_index()
+        self.save_html_index()
+        self.save_json_index()
+        self.save_symlinks_index()
+        
+    def migrate_output_dir(self):
+        """Move the output files to the new folder structure if needed"""
+        print(f'{self}.migrate_output_dir()')
+        self.migrate_from_0_7_2()
+        self.migrate_from_0_8_6()
+        # ... future migrations here
+    
+    def migrate_from_0_7_2(self) -> None:
+        """Migrate output_dir generated by ArchiveBox <= 0.7.2 to current version"""
+        print(f'{self}.migrate_from_0_7_2()')
+        # move /data/archive/<timestamp> -> /data/archive/snapshots/<abid>
+        # update self.output_path = /data/archive/snapshots/<abid>
+        pass
+    
+    def migrate_from_0_8_6(self) -> None:
+        """Migrate output_dir generated by ArchiveBox <= 0.8.6 to current version"""
+        # ... future migration code here ...
+        print(f'{self}.migrate_from_0_8_6()')
+        pass
+
+    def save_merkle_index(self, **kwargs) -> None:
+        """Write the ./.index.merkle file to the output dir"""
+        # write self.generate_merkle_tree() to self.output_dir / '.index.merkle'
+        print(f'{self}.save_merkle_index()')
+        pass
+    
+    def save_html_index(self, **kwargs) -> None:
+        # write self.as_html() to self.output_dir / 'index.html'
+        print(f'{self}.save_html_index()')
+        pass
+    
+    def save_json_index(self, **kwargs) -> None:
+        print(f'{self}.save_json_index()')
+        # write self.as_json() to self.output_dir / 'index.json'
+        pass
+    
+    def save_symlinks_index(self) -> None:
+        print(f'{self}.save_symlinks_index()')
+        # ln -s ../../../../self.output_dir data/index/snapshots_by_date/2024-01-01/example.com/<abid>
+        # ln -s ../../../../self.output_dir data/index/snapshots_by_domain/example.com/2024-01-01/<abid>
+        # ln -s self.output_dir data/archive/1453452234234.21445
+        pass
+
+    def as_json(self) -> dict:
+        """Get the object's properties as a dict"""
+        # dump the object's properties to a json-ready dict
+        return {
+            'TYPE': self.TYPE,
+            'id': self.id,
+            'abid': str(self.ABID),
+            'str': str(self),
+            'modified_at': self.modified_at,
+            'created_at': self.created_at,
+            'created_by_id': self.created_by_id,
+            'status': getattr(self, 'status', None),
+            'retry_at': getattr(self, 'retry_at', None),
+            'notes': getattr(self, 'notes', None),
+        }
+    
+    def as_html(self) -> str:
+        """Get the object's properties as a html string"""
+        # render snapshot_detail.html template with self as context and return html string
+        return ''
 
 
 ####################################################