Ver Fonte

add new InstalledBinary model to cache binaries on host machine

Nick Sweeting há 1 ano atrás
pai
commit
e315905721

+ 14 - 6
archivebox/abx/archivebox/base_binary.py

@@ -14,9 +14,9 @@ from pydantic_pkgr import (
     EnvProvider,
 )
 
-import abx
-
 from archivebox.config import CONSTANTS
+
+import abx
 from .base_hook import BaseHook, HookType
 
 
@@ -92,9 +92,17 @@ class BaseBinary(BaseHook, Binary):
     @abx.hookimpl
     def get_BINARIES(self):
         return [self]
-    
 
 
-apt = AptProvider()
-brew = BrewProvider()
-env = EnvProvider()
+class AptBinProvider(AptProvider, BaseBinProvider):
+    name: BinProviderName = "apt"
+    
+class BrewBinProvider(BrewProvider, BaseBinProvider):
+    name: BinProviderName = "brew"
+    
+class EnvBinProvider(EnvProvider, BaseBinProvider):
+    name: BinProviderName = "env"
+
+apt = AptBinProvider()
+brew = BrewBinProvider()
+env = EnvBinProvider()

+ 103 - 31
archivebox/abx/archivebox/base_extractor.py

@@ -1,14 +1,15 @@
 __package__ = 'abx.archivebox'
 
 import json
-import socket
-from typing import Optional, List, Literal, Annotated, Dict, Any
+
+from typing import Optional, List, Literal, Annotated, Dict, Any, Tuple
 from typing_extensions import Self
 from pathlib import Path
 
 from pydantic import model_validator, AfterValidator
 from pydantic_pkgr import BinName
 from django.utils.functional import cached_property
+from django.utils import timezone
 
 import abx
 
@@ -23,7 +24,7 @@ def no_empty_args(args: List[str]) -> List[str]:
 ExtractorName = Literal['wget', 'warc', 'media', 'singlefile'] | str
 
 HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))]
-CmdArgsList = Annotated[List[str], AfterValidator(no_empty_args)]
+CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(no_empty_args)]
 
 
 class BaseExtractor(BaseHook):
@@ -53,8 +54,9 @@ class BaseExtractor(BaseHook):
 
     def should_extract(self, snapshot) -> bool:
         try:
-            assert self.BIN.version
+            assert self.detect_installed_binary().version
         except Exception:
+            raise
             # could not load binary
             return False
         
@@ -66,19 +68,32 @@ class BaseExtractor(BaseHook):
     @abx.hookimpl
     def extract(self, snapshot_id: str) -> Dict[str, Any]:
         from core.models import Snapshot
+        from archivebox import CONSTANTS
+        
         snapshot = Snapshot.objects.get(id=snapshot_id)
         
         if not self.should_extract(snapshot):
             return {}
         
-        from archivebox import CONSTANTS
+        status = 'failed'
+        start_ts = timezone.now()
+        uplink = self.detect_network_interface()
+        installed_binary = self.detect_installed_binary()
+        machine = installed_binary.machine
+        assert uplink.machine == installed_binary.machine  # it would be *very* weird if this wasn't true
+        
         # output_dir = self.get_output_path(snapshot) or CONSTANTS.TMP_DIR
         output_dir = CONSTANTS.TMP_DIR / 'test'
         output_dir.mkdir(parents=True, exist_ok=True)
 
-        cmd = [snapshot.url, *self.args] if self.args is not None else [snapshot.url, *self.default_args, *self.extra_args]
-        proc = self.exec(cmd, cwd=output_dir)
+        # execute the extractor binary with the given args
+        args = [snapshot.url, *self.args] if self.args is not None else [snapshot.url, *self.default_args, *self.extra_args]
+        cmd = [str(installed_binary.abspath), *args]
+        proc = self.exec(installed_binary=installed_binary, args=args, cwd=output_dir)
 
+        # collect the output
+        end_ts = timezone.now()
+        output_files = list(str(path.relative_to(output_dir)) for path in output_dir.glob('**/*.*'))
         stdout = proc.stdout.strip()
         stderr = proc.stderr.strip()
         output_json = None
@@ -90,59 +105,116 @@ class BaseExtractor(BaseHook):
             pass
         
         errors = []
-        if proc.returncode != 0:
-            errors.append(f'{self.BIN.name} returned non-zero exit code: {proc.returncode}')   
-
-        # pocket@git+https://github.com/tapanpandita/[email protected]
-        binary_str = f'{self.BIN.abspath}@{self.BIN.binprovider.name}:{self.BIN.binprovider.get_packages(self.BIN.name)}=={self.BIN.version}'
+        if proc.returncode == 0:
+            status = 'success'
+        else:
+            errors.append(f'{installed_binary.name} returned non-zero exit code: {proc.returncode}')   
+
+        # increment health stats counters
+        if status == 'success':
+            machine.record_health_success()
+            uplink.record_health_success()
+            installed_binary.record_health_success()
+        else:
+            machine.record_health_failure()
+            uplink.record_health_failure()
+            installed_binary.record_health_failure()
 
         return {
             'extractor': self.name,
             
-            'snapshot_id': snapshot.id,
-            'snapshot_abid': snapshot.abid,
-            'snapshot_url': snapshot.url,
-            'snapshot_created_by_id': snapshot.created_by_id,
+            'snapshot': {
+                'id': snapshot.id,
+                'abid': snapshot.abid,
+                'url': snapshot.url,
+                'created_by_id': snapshot.created_by_id,
+            },
             
-            'hostname': socket.gethostname(),
+            'machine': {
+                'id': machine.id,
+                'abid': machine.abid,
+                'guid': machine.guid,
+                'hostname': machine.hostname,
+                'hw_in_docker': machine.hw_in_docker,
+                'hw_in_vm': machine.hw_in_vm,
+                'hw_manufacturer': machine.hw_manufacturer,
+                'hw_product': machine.hw_product,
+                'hw_uuid': machine.hw_uuid,
+                'os_arch': machine.os_arch,
+                'os_family': machine.os_family,
+                'os_platform': machine.os_platform,
+                'os_release': machine.os_release,
+                'os_kernel': machine.os_kernel,
+            },
             
-            'binary': binary_str,
-            'binary_name': self.BIN.name,
-            'binary_provider': self.BIN.binprovider.name,
-            'binary_version': self.BIN.version,
-            'binary_abspath': self.BIN.abspath,
+            'uplink': { 
+                'id': uplink.id,
+                'abid': uplink.abid,
+                'mac_address': uplink.mac_address,
+                'ip_public': uplink.ip_public,
+                'ip_local': uplink.ip_local,
+                'dns_server': uplink.dns_server,
+                'hostname': uplink.hostname,
+                'iface': uplink.iface,
+                'isp': uplink.isp,
+                'city': uplink.city,
+                'region': uplink.region,
+                'country': uplink.country,
+            },
             
+            'binary': {
+                'id': installed_binary.id,
+                'abid': installed_binary.abid,
+                'name': installed_binary.name,
+                'binprovider': installed_binary.binprovider,
+                'abspath': installed_binary.abspath,
+                'version': installed_binary.version,
+                'sha256': installed_binary.sha256,
+            },
+
             'cmd': cmd,
             'stdout': stdout,
             'stderr': stderr,
             'returncode': proc.returncode,
+            'start_ts': start_ts,
+            'end_ts': end_ts,
             
-            'status': 'succeeded' if proc.returncode == 0 else 'failed',
+            'status': status,
             'errors': errors,
             'output_dir': str(output_dir.relative_to(CONSTANTS.DATA_DIR)),
-            'output_files': list(str(path.relative_to(output_dir)) for path in output_dir.glob('**/*.*')),
+            'output_files': output_files,
             'output_json': output_json or {},
             'output_text': output_text or '',
         }
 
     # TODO: move this to a hookimpl
-    def exec(self, args: CmdArgsList, cwd: Optional[Path]=None, binary=None):
+    def exec(self, args: CmdArgsList=(), cwd: Optional[Path]=None, installed_binary=None):
         cwd = cwd or Path('.')
-        binary = (binary or self.BINARY).load()
+        binary = self.load_binary(installed_binary=installed_binary)
         
         return binary.exec(cmd=args, cwd=cwd)
     
     @cached_property
     def BINARY(self) -> BaseBinary:
-        from django.conf import settings
-        for binary in settings.BINARIES.values():
+        import abx.archivebox.use
+        for binary in abx.archivebox.use.get_BINARIES().values():
             if binary.name == self.binary:
                 return binary
         raise ValueError(f'Binary {self.binary} not found')
     
-    @cached_property
-    def BIN(self) -> BaseBinary:
-        return self.BINARY.load()
+    def detect_installed_binary(self):
+        from machine.models import InstalledBinary
+        # hydrates binary from DB/cache if record of installed version is recent enough
+        # otherwise it finds it from scratch by detecting installed version/abspath/sha256 on host
+        return InstalledBinary.objects.get_from_db_or_cache(self.BINARY)
+
+    def load_binary(self, installed_binary=None) -> BaseBinary:
+        installed_binary = installed_binary or self.detect_installed_binary()
+        return installed_binary.load_from_db()
+    
+    def detect_network_interface(self):
+        from machine.models import NetworkInterface
+        return NetworkInterface.objects.current()
 
     @abx.hookimpl
     def get_EXTRACTORS(self):

+ 5 - 1
archivebox/abx/archivebox/use.py

@@ -46,9 +46,13 @@ def get_FLAT_CONFIG() -> Dict[str, Any]:
     })
 
 def get_BINPROVIDERS() -> Dict[str, BaseBinProvider]:
+    # TODO: move these to plugins
+    from abx.archivebox.base_binary import apt, brew, env
+    builtin_binproviders = [apt, brew, env]
+    
     return benedict({
         binprovider.id: binprovider
-        for plugin_binproviders in pm.hook.get_BINPROVIDERS()
+        for plugin_binproviders in [builtin_binproviders, *pm.hook.get_BINPROVIDERS()]
             for binprovider in plugin_binproviders
     })
 

+ 0 - 1
archivebox/config/apps.py

@@ -38,7 +38,6 @@ class ConfigPlugin(BasePlugin):
         ARCHIVING_CONFIG,
         SEARCH_BACKEND_CONFIG,
     ]
-    
 
 
 PLUGIN = ConfigPlugin()

+ 6 - 3
archivebox/config/views.py

@@ -16,6 +16,8 @@ from admin_data_views.utils import render_with_table_view, render_with_item_view
 from archivebox.config import CONSTANTS
 from archivebox.misc.util import parse_date
 
+from machine.models import InstalledBinary
+
 
 def obj_to_yaml(obj: Any, indent: int=0) -> str:
     indent_str = "  " * indent
@@ -64,7 +66,7 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
     assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
 
     rows = {
-        "Binary": [],
+        "Binary Name": [],
         "Found Version": [],
         "From Plugin": [],
         "Provided By": [],
@@ -83,11 +85,12 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
     for plugin in settings.PLUGINS.values():
         for binary in plugin.HOOKS_BY_TYPE.get('BINARY', {}).values():
             try:
-                binary = binary.load()
+                installed_binary = InstalledBinary.objects.get_from_db_or_cache(binary)
+                binary = installed_binary.load_from_db()
             except Exception as e:
                 print(e)
 
-            rows['Binary'].append(ItemLink(binary.name, key=binary.name))
+            rows['Binary Name'].append(ItemLink(binary.name, key=binary.name))
             rows['Found Version'].append(f'✅ {binary.loaded_version}' if binary.loaded_version else '❌ missing')
             rows['From Plugin'].append(plugin.plugin_module)
             rows['Provided By'].append(

+ 27 - 1
archivebox/core/admin.py

@@ -29,7 +29,7 @@ from core.mixins import SearchResultsAdminMixin
 from api.models import APIToken
 from abid_utils.admin import ABIDModelAdmin
 from queues.tasks import bg_archive_links, bg_add
-from machine.models import Machine, NetworkInterface
+from machine.models import Machine, NetworkInterface, InstalledBinary
 
 from index.html import snapshot_icons
 from logging_util import printable_filesize
@@ -829,3 +829,29 @@ class NetworkInterfaceAdmin(ABIDModelAdmin):
             iface.machine.abid,
             iface.machine.hostname,
         )
+
[email protected](InstalledBinary, site=archivebox_admin)
+class InstalledBinaryAdmin(ABIDModelAdmin):
+    list_display = ('abid', 'created_at', 'machine_info', 'name', 'binprovider', 'version', 'abspath', 'sha256', 'health')
+    sort_fields = ('abid', 'created_at', 'machine_info', 'name', 'binprovider', 'version', 'abspath', 'sha256')
+    search_fields = ('abid', 'machine__abid', 'name', 'binprovider', 'version', 'abspath', 'sha256')
+    
+    readonly_fields = ('created_at', 'modified_at', 'abid_info')
+    fields = ('machine', 'name', 'binprovider', 'abspath', 'version', 'sha256', *readonly_fields, 'num_uses_succeeded', 'num_uses_failed')
+
+    list_filter = ('name', 'binprovider', 'machine_id')
+    ordering = ['-created_at']
+    list_per_page = 100
+    actions = ["delete_selected"]
+
+    @admin.display(
+        description='Machine',
+        ordering='machine__abid',
+    )
+    def machine_info(self, installed_binary):
+        return format_html(
+            '<a href="/admin/machine/machine/{}/change"><b><code>[{}]</code></b> &nbsp; {}</a>',
+            installed_binary.machine.id,
+            installed_binary.machine.abid,
+            installed_binary.machine.hostname,
+        )

+ 8 - 0
archivebox/core/models.py

@@ -22,6 +22,7 @@ from archivebox.config import CONSTANTS
 
 from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
 from queues.tasks import bg_archive_snapshot
+from machine.models import Machine, NetworkInterface
 
 from archivebox.misc.system import get_dir_size
 from archivebox.misc.util import parse_date, base_url
@@ -545,6 +546,9 @@ class ArchiveResult(ABIDModel):
     end_ts = models.DateTimeField()
     status = models.CharField(max_length=16, choices=STATUS_CHOICES)
 
+    # the network interface that was used to download this result
+    # uplink = models.ForeignKey(NetworkInterface, on_delete=models.SET_NULL, null=True, blank=True, verbose_name='Network Interface Used')
+
     objects = ArchiveResultManager()
 
     class Meta(TypedModelMeta):
@@ -556,6 +560,10 @@ class ArchiveResult(ABIDModel):
         # return f'[{self.abid}] 📅 {self.start_ts.strftime("%Y-%m-%d %H:%M")} 📄 {self.extractor} {self.snapshot.url}'
         return self.extractor
 
+    @cached_property
+    def machine(self):
+        return self.iface.machine if self.iface else None
+
     @cached_property
     def snapshot_dir(self):
         return Path(self.snapshot.link_dir)

+ 1 - 1
archivebox/core/settings.py

@@ -480,7 +480,7 @@ ADMIN_DATA_VIEWS = {
         {
             "route": "binaries/",
             "view": "archivebox.config.views.binaries_list_view",
-            "name": "Binaries",
+            "name": "Dependencies",
             "items": {
                 "route": "<str:key>/",
                 "view": "archivebox.config.views.binary_detail_view",

+ 167 - 23
archivebox/machine/models.py

@@ -124,44 +124,188 @@ class NetworkInterface(ABIDModel):
     dns_server = models.GenericIPAddressField(default=None, null=False, editable=False)       # e.g. 8.8.8.8         or 2001:0db8:85a3:0000:0000:8a2e:0370:7334
     
     # MUTABLE PROPERTIES
-    iface = models.CharField(max_length=15, default=None, null=False)                         # e.g. en0
     hostname = models.CharField(max_length=63, default=None, null=False)                      # e.g. somehost.sub.example.com
+    iface = models.CharField(max_length=15, default=None, null=False)                         # e.g. en0
     isp = models.CharField(max_length=63, default=None, null=False)                           # e.g. AS-SONICTELECOM
     city = models.CharField(max_length=63, default=None, null=False)                          # e.g. Berkeley
     region = models.CharField(max_length=63, default=None, null=False)                        # e.g. California
     country = models.CharField(max_length=63, default=None, null=False)                       # e.g. United States
 
-    objects = NetworkInterfaceManager()
+    # STATS COUNTERS (from ModelWithHealthStats)
+    # num_uses_failed = models.PositiveIntegerField(default=0)
+    # num_uses_succeeded = models.PositiveIntegerField(default=0)
+
+    objects: NetworkInterfaceManager = NetworkInterfaceManager()
     
     class Meta:
         unique_together = (
+            # if *any* of these change, it's considered a different interface
+            # because we might get different downloaded content as a result,
+            # this forces us to store an audit trail whenever these things change
             ('machine', 'ip_public', 'ip_local', 'mac_address', 'dns_server'),
         )
+
+
+class InstalledBinaryManager(models.Manager):
+    def get_from_db_or_cache(self, binary: Binary) -> 'InstalledBinary':
+        """Get or create an InstalledBinary record for a Binary on the local machine"""
         
+        global CURRENT_BINARIES
+        cached_binary = CURRENT_BINARIES.get(binary.id)
+        if cached_binary:
+            expires_at = cached_binary.modified_at + timedelta(seconds=INSTALLED_BINARY_RECHECK_INTERVAL)
+            if timezone.now() < expires_at:
+                is_loaded = binary.abspath and binary.version and binary.sha256
+                if is_loaded:
+                    # if the caller took did the (expensive) job of loading the binary from the filesystem already
+                    # then their in-memory version is certainly more up-to-date than any potential cached version
+                    # use this opportunity to invalidate the cache in case if anything has changed
+                    is_different_from_cache = (
+                        binary.abspath != cached_binary.abspath
+                        or binary.version != cached_binary.version
+                        or binary.sha256 != cached_binary.sha256
+                    )
+                    if is_different_from_cache:
+                        CURRENT_BINARIES.pop(binary.id)
+                    else:
+                        return cached_binary
+                else:
+                    # if they have not yet loaded the binary
+                    # but our cache is recent enough and not expired, assume cached version is good enough
+                    # it will automatically reload when the cache expires
+                    # cached_binary will be stale/bad for up to 30min if binary was updated/removed on host system
+                    return cached_binary
+            else:
+                # cached binary is too old, reload it from scratch
+                CURRENT_BINARIES.pop(binary.id)
+        
+        if not binary.abspath or not binary.version or not binary.sha256:
+            # if binary was not yet loaded from filesystem, do it now
+            # this is expensive, we have to find it's abspath, version, and sha256, but it's necessary
+            # to make sure we have a good, up-to-date record of it in the DB & in-memroy cache
+            binary = binary.load()
 
-# class InstalledBinary(ABIDModel):
-#     abid_prefix = 'bin_'
-#     abid_ts_src = 'self.machine.created_at'
-#     abid_uri_src = 'self.machine.guid'
-#     abid_subtype_src = 'self.binprovider'
-#     abid_rand_src = 'self.id'
-#     abid_drift_allowed = False
+        assert binary.loaded_binprovider and binary.loaded_abspath and binary.loaded_version and binary.loaded_sha256, f'Failed to load binary {binary.name} abspath, version, and sha256'
+        
+        CURRENT_BINARIES[binary.id], _created = self.update_or_create(
+            machine=Machine.objects.current(),
+            name=binary.name,
+            binprovider=binary.loaded_binprovider.name,
+            version=str(binary.loaded_version),
+            abspath=str(binary.loaded_abspath),
+            sha256=str(binary.loaded_sha256),
+        )
+        cached_binary = CURRENT_BINARIES[binary.id]
+        cached_binary.save()   # populate ABID
+        
+        # if we get this far make sure DB record matches in-memroy cache
+        assert str(cached_binary.binprovider) == str(binary.loaded_binprovider.name)
+        assert str(cached_binary.abspath) == str(binary.loaded_abspath)
+        assert str(cached_binary.version) == str(binary.loaded_version)
+        assert str(cached_binary.sha256) == str(binary.loaded_sha256)
+        
+        return cached_binary
     
-#     id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
-#     abid = ABIDField(prefix=abid_prefix)
 
-#     created_at = AutoDateTimeField(default=None, null=False, db_index=True)
-#     modified_at = models.DateTimeField(auto_now=True)
+
+class InstalledBinary(ABIDModel, ModelWithHealthStats):
+    abid_prefix = 'bin_'
+    abid_ts_src = 'self.machine.created_at'
+    abid_uri_src = 'self.machine.guid'
+    abid_subtype_src = 'self.binprovider'
+    abid_rand_src = 'self.id'
+    abid_drift_allowed = False
     
-#     machine = models.ForeignKey(Machine, on_delete=models.CASCADE, default=None, null=False)
-#     binprovider = models.CharField(max_length=255, default=None, null=False)
+    id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
+    abid = ABIDField(prefix=abid_prefix)
+
+    created_at = AutoDateTimeField(default=None, null=False, db_index=True)
+    modified_at = models.DateTimeField(auto_now=True)
+    
+    # IMMUTABLE PROPERTIES
+    machine = models.ForeignKey(Machine, on_delete=models.CASCADE, default=None, null=False, blank=True)
+    name = models.CharField(max_length=63, default=None, null=False, blank=True)
+    binprovider = models.CharField(max_length=31, default=None, null=False, blank=True)
+    abspath = models.CharField(max_length=255, default=None, null=False, blank=True)
+    version = models.CharField(max_length=32, default=None, null=False, blank=True)
+    sha256 = models.CharField(max_length=64, default=None, null=False, blank=True)
+    
+    # MUTABLE PROPERTIES
+    # is_pinned = models.BooleanField(default=False)    # i.e. should this binary superceede other binaries with the same name on the host?
+    # is_valid = models.BooleanField(default=True)      # i.e. is this binary still available on the host?
     
-#     name = models.CharField(max_length=255, default=None, null=False)
-#     version = models.CharField(max_length=255, default=None, null=False)
-#     abspath = models.CharField(max_length=255, default=None, null=False)
-#     sha256 = models.CharField(max_length=255, default=None, null=False)
+    # STATS COUNTERS (from ModelWithHealthStats)
+    # num_uses_failed = models.PositiveIntegerField(default=0)
+    # num_uses_succeeded = models.PositiveIntegerField(default=0)
+    
+    objects: InstalledBinaryManager = InstalledBinaryManager()
+    
+    class Meta:
+        verbose_name = 'Installed Binary'
+        verbose_name_plural = 'Installed Binaries'
+        unique_together = (
+            ('machine', 'name', 'binprovider', 'abspath', 'version', 'sha256'),
+        )
+
+    def __str__(self) -> str:
+        return f'{self.name}@{self.binprovider}+{self.abspath}@{self.version}'
     
-#     class Meta:
-#         unique_together = (
-#             ('machine', 'binprovider', 'version', 'abspath', 'sha256'),
-#         )
+    def clean(self, *args, **kwargs) -> None:
+        assert self.name or self.abspath
+        self.name = str(self.name or self.abspath)
+        assert self.name
+
+        if not hasattr(self, 'machine'):
+            self.machine = Machine.objects.current()
+        if not self.binprovider:
+            all_known_binproviders = list(abx.archivebox.use.get_BINPROVIDERS().values())
+            binary = Binary(name=self.name, binproviders=all_known_binproviders).load()
+            self.binprovider = binary.loaded_binprovider.name if binary.loaded_binprovider else None
+        if not self.abspath:
+            self.abspath = self.BINPROVIDER.get_abspath(self.name)
+        if not self.version:
+            self.version = self.BINPROVIDER.get_version(self.name, abspath=self.abspath)
+        if not self.sha256:
+            self.sha256 = self.BINPROVIDER.get_sha256(self.name, abspath=self.abspath)
+            
+        super().clean(*args, **kwargs)
+
+    @cached_property
+    def BINARY(self) -> BaseBinary:
+        for binary in abx.archivebox.use.get_BINARIES().values():
+            if binary.name == self.name:
+                return binary
+        raise Exception(f'Orphaned InstalledBinary {self.name} {self.binprovider} was found in DB, could not find any plugin that defines it')
+        # TODO: we could technically reconstruct it from scratch, but why would we ever want to do that?
+
+    @cached_property
+    def BINPROVIDER(self) -> BaseBinProvider:
+        for binprovider in abx.archivebox.use.get_BINPROVIDERS().values():
+            if binprovider.name == self.binprovider:
+                return binprovider
+        raise Exception(f'Orphaned InstalledBinary(name={self.name}) was found in DB, could not find any plugin that defines BinProvider(name={self.binprovider})')
+
+    # maybe not a good idea to provide this? Binary in DB is a record of the binary's config
+    # whereas a loaded binary is a not-yet saved instance that may not have the same config
+    # why would we want to load a binary record from the db when it could be freshly loaded?
+    def load_from_db(self) -> BaseBinary:
+        # TODO: implement defaults arg in pydantic_pkgr
+        # return self.BINARY.load(defaults={
+        #     'binprovider': self.BINPROVIDER,
+        #     'abspath': Path(self.abspath),
+        #     'version': self.version,
+        #     'sha256': self.sha256,
+        # })
+        
+        return BaseBinary.model_validate({
+            **self.BINARY.model_dump(),
+            'abspath': self.abspath and Path(self.abspath),
+            'version': self.version,
+            'sha256': self.sha256,
+            'loaded_binprovider': self.BINPROVIDER,
+            'binproviders_supported': self.BINARY.binproviders_supported,
+            'provider_overrides': self.BINARY.provider_overrides,
+        })
+
+    def load_fresh(self) -> BaseBinary:
+        return self.BINARY.load()