archive.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. # archivebox_api.py
  2. from typing import List, Optional
  3. from enum import Enum
  4. from pydantic import BaseModel
  5. from ninja import Router
  6. from main import (
  7. add,
  8. remove,
  9. update,
  10. list_all,
  11. ONLY_NEW,
  12. ) # Assuming these functions are defined in main.py
  13. # Schemas
  14. class StatusChoices(str, Enum):
  15. indexed = 'indexed'
  16. archived = 'archived'
  17. unarchived = 'unarchived'
  18. present = 'present'
  19. valid = 'valid'
  20. invalid = 'invalid'
  21. duplicate = 'duplicate'
  22. orphaned = 'orphaned'
  23. corrupted = 'corrupted'
  24. unrecognized = 'unrecognized'
  25. class AddURLSchema(BaseModel):
  26. urls: List[str]
  27. tag: str = ""
  28. depth: int = 0
  29. update: bool = not ONLY_NEW # Default to the opposite of ONLY_NEW
  30. update_all: bool = False
  31. index_only: bool = False
  32. overwrite: bool = False
  33. init: bool = False
  34. extractors: str = ""
  35. parser: str = "auto"
  36. class RemoveURLSchema(BaseModel):
  37. yes: bool = False
  38. delete: bool = False
  39. before: Optional[float] = None
  40. after: Optional[float] = None
  41. filter_type: str = "exact"
  42. filter_patterns: Optional[List[str]] = None
  43. class UpdateSchema(BaseModel):
  44. resume: Optional[float] = None
  45. only_new: Optional[bool] = None
  46. index_only: Optional[bool] = False
  47. overwrite: Optional[bool] = False
  48. before: Optional[float] = None
  49. after: Optional[float] = None
  50. status: Optional[StatusChoices] = None
  51. filter_type: Optional[str] = 'exact'
  52. filter_patterns: Optional[List[str]] = None
  53. extractors: Optional[str] = ""
  54. class ListAllSchema(BaseModel):
  55. filter_patterns: Optional[List[str]] = None
  56. filter_type: str = 'exact'
  57. status: Optional[StatusChoices] = None
  58. after: Optional[float] = None
  59. before: Optional[float] = None
  60. sort: Optional[str] = None
  61. csv: Optional[str] = None
  62. json: bool = False
  63. html: bool = False
  64. with_headers: bool = False
  65. # API Router
  66. router = Router()
  67. @router.post("/add", response={200: dict})
  68. def api_add(request, payload: AddURLSchema):
  69. try:
  70. result = add(
  71. urls=payload.urls,
  72. tag=payload.tag,
  73. depth=payload.depth,
  74. update=payload.update,
  75. update_all=payload.update_all,
  76. index_only=payload.index_only,
  77. overwrite=payload.overwrite,
  78. init=payload.init,
  79. extractors=payload.extractors,
  80. parser=payload.parser,
  81. )
  82. # Currently the add function returns a list of ALL items in the DB, ideally only return new items
  83. return {
  84. "status": "success",
  85. "message": "URLs added successfully.",
  86. "result": str(result),
  87. }
  88. except Exception as e:
  89. # Handle exceptions raised by the add function or during processing
  90. return {"status": "error", "message": str(e)}
  91. @router.post("/remove", response={200: dict})
  92. def api_remove(request, payload: RemoveURLSchema):
  93. try:
  94. result = remove(
  95. yes=payload.yes,
  96. delete=payload.delete,
  97. before=payload.before,
  98. after=payload.after,
  99. filter_type=payload.filter_type,
  100. filter_patterns=payload.filter_patterns,
  101. )
  102. return {
  103. "status": "success",
  104. "message": "URLs removed successfully.",
  105. "result": result,
  106. }
  107. except Exception as e:
  108. # Handle exceptions raised by the remove function or during processing
  109. return {"status": "error", "message": str(e)}
  110. @router.post("/update", response={200: dict})
  111. def api_update(request, payload: UpdateSchema):
  112. try:
  113. result = update(
  114. resume=payload.resume,
  115. only_new=payload.only_new,
  116. index_only=payload.index_only,
  117. overwrite=payload.overwrite,
  118. before=payload.before,
  119. after=payload.after,
  120. status=payload.status,
  121. filter_type=payload.filter_type,
  122. filter_patterns=payload.filter_patterns,
  123. extractors=payload.extractors,
  124. )
  125. return {
  126. "status": "success",
  127. "message": "Archive updated successfully.",
  128. "result": result,
  129. }
  130. except Exception as e:
  131. # Handle exceptions raised by the update function or during processing
  132. return {"status": "error", "message": str(e)}
  133. @router.post("/list_all", response={200: dict})
  134. def api_list_all(request, payload: ListAllSchema):
  135. try:
  136. result = list_all(
  137. filter_patterns=payload.filter_patterns,
  138. filter_type=payload.filter_type,
  139. status=payload.status,
  140. after=payload.after,
  141. before=payload.before,
  142. sort=payload.sort,
  143. csv=payload.csv,
  144. json=payload.json,
  145. html=payload.html,
  146. with_headers=payload.with_headers,
  147. )
  148. # TODO: This is kind of bad, make the format a choice field
  149. if payload.json:
  150. return {"status": "success", "format": "json", "data": result}
  151. elif payload.html:
  152. return {"status": "success", "format": "html", "data": result}
  153. elif payload.csv:
  154. return {"status": "success", "format": "csv", "data": result}
  155. else:
  156. return {
  157. "status": "success",
  158. "message": "List generated successfully.",
  159. "data": result,
  160. }
  161. except Exception as e:
  162. # Handle exceptions raised by the list_all function or during processing
  163. return {"status": "error", "message": str(e)}