Przeglądaj źródła

add note about saving timestamp strings independently

Nick Sweeting 6 lat temu
rodzic
commit
880b425df6
1 zmienionych plików z 11 dodań i 0 usunięć
  1. 11 0
      archivebox/util.py

+ 11 - 0
archivebox/util.py

@@ -373,6 +373,11 @@ def parse_date(date: Any) -> Optional[datetime]:
             # anything from hours to decades, depending on which app, OS,
             # and sytem time configuration was used for the original timestamp
             # more info: https://github.com/pirate/ArchiveBox/issues/119
+
+            # Note: always always always store the original timestamp string
+            # somewhere indepentendly of the parsed datetime, so that later
+            # bugs dont repeatedly misparse and rewrite increasingly worse dates.
+            # the correct date can always be re-derived from the timestamp str
             timestamp = float(date)
 
             EARLIEST_POSSIBLE = 473403600.0  # 1985
@@ -389,6 +394,12 @@ def parse_date(date: Any) -> Optional[datetime]:
                 # number is microseconds
                 return datetime.fromtimestamp(timestamp / (1000*1000))
 
+            else:
+                # continue to the end and raise a parsing failed error.
+                # we dont want to even attempt parsing timestamp strings that
+                # arent within these ranges
+                pass
+
         if '-' in date:
             try:
                 return datetime.fromisoformat(date)