index.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. import os
  2. import json
  3. from datetime import datetime
  4. from string import Template
  5. from distutils.dir_util import copy_tree
  6. from config import (
  7. OUTPUT_DIR,
  8. TEMPLATES_DIR,
  9. OUTPUT_PERMISSIONS,
  10. ANSI,
  11. GIT_SHA,
  12. FOOTER_INFO,
  13. )
  14. from util import (
  15. chmod_file,
  16. wget_output_path,
  17. derived_link_info,
  18. pretty_path,
  19. check_link_structure,
  20. check_links_structure,
  21. )
  22. ### Homepage index for all the links
  23. def write_links_index(out_dir, links):
  24. """create index.html file for a given list of links"""
  25. check_links_structure(links)
  26. if not os.path.exists(out_dir):
  27. os.makedirs(out_dir)
  28. print('{green}[*] [{}] Updating main index files...{reset}'.format(
  29. datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
  30. **ANSI,
  31. ))
  32. write_json_links_index(out_dir, links)
  33. print(' > {}/index.json'.format(pretty_path(out_dir)))
  34. write_html_links_index(out_dir, links)
  35. print(' > {}/index.html'.format(pretty_path(out_dir)))
  36. def write_json_links_index(out_dir, links):
  37. """write the json link index to a given path"""
  38. check_links_structure(links)
  39. path = os.path.join(out_dir, 'index.json')
  40. index_json = {
  41. 'info': 'ArchiveBox Index',
  42. 'help': 'https://github.com/pirate/ArchiveBox',
  43. 'version': GIT_SHA,
  44. 'num_links': len(links),
  45. 'updated': str(datetime.now().timestamp()),
  46. 'links': links,
  47. }
  48. with open(path, 'w', encoding='utf-8') as f:
  49. json.dump(index_json, f, indent=4, default=str)
  50. chmod_file(path)
  51. def parse_json_links_index(out_dir):
  52. """load the index in a given directory and merge it with the given link"""
  53. index_path = os.path.join(out_dir, 'index.json')
  54. if os.path.exists(index_path):
  55. with open(index_path, 'r', encoding='utf-8') as f:
  56. links = json.load(f)['links']
  57. check_links_structure(links)
  58. return links
  59. return []
  60. def write_html_links_index(out_dir, links):
  61. """write the html link index to a given path"""
  62. check_links_structure(links)
  63. path = os.path.join(out_dir, 'index.html')
  64. copy_tree(os.path.join(TEMPLATES_DIR, 'static'), os.path.join(out_dir, 'static'))
  65. with open(os.path.join(out_dir, 'robots.txt'), 'w+') as f:
  66. f.write('User-agent: *\nDisallow: /')
  67. with open(os.path.join(TEMPLATES_DIR, 'index.html'), 'r', encoding='utf-8') as f:
  68. index_html = f.read()
  69. with open(os.path.join(TEMPLATES_DIR, 'index_row.html'), 'r', encoding='utf-8') as f:
  70. link_row_html = f.read()
  71. link_rows = '\n'.join(
  72. Template(link_row_html).substitute(**derived_link_info(link))
  73. for link in links
  74. )
  75. template_vars = {
  76. 'num_links': len(links),
  77. 'date_updated': datetime.now().strftime('%Y-%m-%d'),
  78. 'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'),
  79. 'footer_info': FOOTER_INFO,
  80. 'git_sha': GIT_SHA,
  81. 'short_git_sha': GIT_SHA[:8],
  82. 'rows': link_rows,
  83. }
  84. with open(path, 'w', encoding='utf-8') as f:
  85. f.write(Template(index_html).substitute(**template_vars))
  86. chmod_file(path)
  87. def patch_index_title_hack(link_url, new_title):
  88. """hack to update just one link's title in the link index json"""
  89. json_path = os.path.join(OUTPUT_DIR, 'index.json')
  90. links = parse_json_links_index(OUTPUT_DIR)
  91. changed = False
  92. for link in links:
  93. if link['url'] == link_url:
  94. link['title'] = new_title
  95. changed = True
  96. break
  97. if changed:
  98. write_json_links_index(OUTPUT_DIR, links)
  99. ### Individual link index
  100. def write_link_index(out_dir, link):
  101. link['updated'] = str(datetime.now().timestamp())
  102. write_json_link_index(out_dir, link)
  103. write_html_link_index(out_dir, link)
  104. def write_json_link_index(out_dir, link):
  105. """write a json file with some info about the link"""
  106. check_link_structure(link)
  107. path = os.path.join(out_dir, 'index.json')
  108. print(' √ index.json')
  109. with open(path, 'w', encoding='utf-8') as f:
  110. json.dump(link, f, indent=4, default=str)
  111. chmod_file(path)
  112. def parse_json_link_index(out_dir):
  113. """load the json link index from a given directory"""
  114. existing_index = os.path.join(out_dir, 'index.json')
  115. if os.path.exists(existing_index):
  116. with open(existing_index, 'r', encoding='utf-8') as f:
  117. link_json = json.load(f)
  118. check_link_structure(link_json)
  119. return link_json
  120. return {}
  121. def write_html_link_index(out_dir, link):
  122. check_link_structure(link)
  123. with open(os.path.join(TEMPLATES_DIR, 'link_index_fancy.html'), 'r', encoding='utf-8') as f:
  124. link_html = f.read()
  125. path = os.path.join(out_dir, 'index.html')
  126. print(' √ index.html')
  127. with open(path, 'w', encoding='utf-8') as f:
  128. f.write(Template(link_html).substitute({
  129. **link,
  130. **link['latest'],
  131. 'title': link['title'] or link['url'],
  132. 'type': link['type'] or 'website',
  133. 'tags': link['tags'] or 'untagged',
  134. 'bookmarked': datetime.fromtimestamp(float(link['timestamp'])).strftime('%Y-%m-%d %H:%M'),
  135. 'updated': datetime.fromtimestamp(float(link['updated'])).strftime('%Y-%m-%d %H:%M'),
  136. 'bookmarked_ts': link['timestamp'],
  137. 'updated_ts': link['updated'],
  138. 'archive_org': link['latest'].get('archive_org') or 'https://web.archive.org/save/{}'.format(link['url']),
  139. 'wget': link['latest'].get('wget') or wget_output_path(link),
  140. }))
  141. chmod_file(path)