index.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. import os
  2. import json
  3. from datetime import datetime
  4. from string import Template
  5. from distutils.dir_util import copy_tree
  6. from config import (
  7. OUTPUT_DIR,
  8. TEMPLATES_DIR,
  9. ANSI,
  10. GIT_SHA,
  11. FOOTER_INFO,
  12. )
  13. from util import (
  14. chmod_file,
  15. derived_link_info,
  16. pretty_path,
  17. check_link_structure,
  18. check_links_structure,
  19. )
  20. ### Homepage index for all the links
  21. def write_links_index(out_dir, links):
  22. """create index.html file for a given list of links"""
  23. check_links_structure(links)
  24. if not os.path.exists(out_dir):
  25. os.makedirs(out_dir)
  26. print('{green}[*] [{}] Updating main index files...{reset}'.format(
  27. datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
  28. **ANSI,
  29. ))
  30. write_json_links_index(out_dir, links)
  31. print(' > {}/index.json'.format(pretty_path(out_dir)))
  32. write_html_links_index(out_dir, links)
  33. print(' > {}/index.html'.format(pretty_path(out_dir)))
  34. def write_json_links_index(out_dir, links):
  35. """write the json link index to a given path"""
  36. check_links_structure(links)
  37. path = os.path.join(out_dir, 'index.json')
  38. index_json = {
  39. 'info': 'ArchiveBox Index',
  40. 'help': 'https://github.com/pirate/ArchiveBox',
  41. 'version': GIT_SHA,
  42. 'num_links': len(links),
  43. 'updated': str(datetime.now().timestamp()),
  44. 'links': links,
  45. }
  46. with open(path, 'w', encoding='utf-8') as f:
  47. json.dump(index_json, f, indent=4, default=str)
  48. chmod_file(path)
  49. def parse_json_links_index(out_dir):
  50. """load the index in a given directory and merge it with the given link"""
  51. index_path = os.path.join(out_dir, 'index.json')
  52. if os.path.exists(index_path):
  53. with open(index_path, 'r', encoding='utf-8') as f:
  54. links = json.load(f)['links']
  55. check_links_structure(links)
  56. return links
  57. return []
  58. def write_html_links_index(out_dir, links):
  59. """write the html link index to a given path"""
  60. check_links_structure(links)
  61. path = os.path.join(out_dir, 'index.html')
  62. copy_tree(os.path.join(TEMPLATES_DIR, 'static'), os.path.join(out_dir, 'static'))
  63. with open(os.path.join(out_dir, 'robots.txt'), 'w+') as f:
  64. f.write('User-agent: *\nDisallow: /')
  65. with open(os.path.join(TEMPLATES_DIR, 'index.html'), 'r', encoding='utf-8') as f:
  66. index_html = f.read()
  67. with open(os.path.join(TEMPLATES_DIR, 'index_row.html'), 'r', encoding='utf-8') as f:
  68. link_row_html = f.read()
  69. link_rows = '\n'.join(
  70. Template(link_row_html).substitute(**derived_link_info(link))
  71. for link in links
  72. )
  73. template_vars = {
  74. 'num_links': len(links),
  75. 'date_updated': datetime.now().strftime('%Y-%m-%d'),
  76. 'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'),
  77. 'footer_info': FOOTER_INFO,
  78. 'git_sha': GIT_SHA,
  79. 'short_git_sha': GIT_SHA[:8],
  80. 'rows': link_rows,
  81. }
  82. with open(path, 'w', encoding='utf-8') as f:
  83. f.write(Template(index_html).substitute(**template_vars))
  84. chmod_file(path)
  85. def patch_index_title_hack(link_url, new_title):
  86. """hack to update just one link's title in the link index json"""
  87. json_path = os.path.join(OUTPUT_DIR, 'index.json')
  88. links = parse_json_links_index(OUTPUT_DIR)
  89. changed = False
  90. for link in links:
  91. if link['url'] == link_url:
  92. link['title'] = new_title
  93. changed = True
  94. break
  95. if changed:
  96. write_json_links_index(OUTPUT_DIR, links)
  97. ### Individual link index
  98. def write_link_index(out_dir, link):
  99. link['updated'] = str(datetime.now().timestamp())
  100. write_json_link_index(out_dir, link)
  101. write_html_link_index(out_dir, link)
  102. def write_json_link_index(out_dir, link):
  103. """write a json file with some info about the link"""
  104. check_link_structure(link)
  105. path = os.path.join(out_dir, 'index.json')
  106. print(' √ index.json')
  107. with open(path, 'w', encoding='utf-8') as f:
  108. json.dump(link, f, indent=4, default=str)
  109. chmod_file(path)
  110. def parse_json_link_index(out_dir):
  111. """load the json link index from a given directory"""
  112. existing_index = os.path.join(out_dir, 'index.json')
  113. if os.path.exists(existing_index):
  114. with open(existing_index, 'r', encoding='utf-8') as f:
  115. link_json = json.load(f)
  116. check_link_structure(link_json)
  117. return link_json
  118. return {}
  119. def write_html_link_index(out_dir, link):
  120. check_link_structure(link)
  121. with open(os.path.join(TEMPLATES_DIR, 'link_index.html'), 'r', encoding='utf-8') as f:
  122. link_html = f.read()
  123. path = os.path.join(out_dir, 'index.html')
  124. print(' √ index.html')
  125. with open(path, 'w', encoding='utf-8') as f:
  126. f.write(Template(link_html).substitute({
  127. **derived_link_info(link),
  128. # **link['latest'],
  129. }))
  130. chmod_file(path)