export_browser_history.sh 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. #!/usr/bin/env bash
  2. #
  3. # Helper script to export browser history and bookmarks to a format ArchiveBox can ingest.
  4. # Usage:
  5. # curl -O 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/dev/bin/export_browser_history.sh'
  6. # bash export_browser_history.sh --chrome
  7. # bash export_browser_history.sh --firefox
  8. # bash export_browser_history.sh --safari
  9. # ls
  10. # chrome_history.json
  11. # chrome_bookmarks.json
  12. # firefox_history.json
  13. # firefox_bookmarks.json
  14. # safari_history.json
  15. #
  16. # Assumptions:
  17. #
  18. # * you're running this on macOS or Linux
  19. # * you're running a reasonably modern version of Bash
  20. # * macOS users: `brew install bash`
  21. #
  22. # Dependencies:
  23. #
  24. # * sqlite
  25. # * jq (for chrome bookmarks)
  26. #
  27. set -eo pipefail
  28. BROWSER_TO_EXPORT="${1?Please specify --chrome, --firefox, or --safari}"
  29. OUTPUT_DIR="$(pwd)"
  30. is_linux() {
  31. [[ "$(uname -s)" == "Linux" ]]
  32. }
  33. find_firefox_places_db() {
  34. # shellcheck disable=SC2012 # `ls` with path expansion is good enough, don't need `find`
  35. if is_linux; then
  36. ls ~/.mozilla/firefox/*.default*/places.sqlite | head -n 1
  37. else
  38. ls ~/Library/Application\ Support/Firefox/Profiles/*.default*/places.sqlite | head -n 1
  39. fi
  40. }
  41. find_chrome_history_db() {
  42. if is_linux; then
  43. local config_home="${XDG_CONFIG_HOME:-${HOME}/.config}"
  44. for path in \
  45. "${config_home}/chromium/Default/History" \
  46. "${config_home}/google-chrome/Default/History";
  47. do
  48. if [ -f "${path}" ]; then
  49. echo "${path}"
  50. return
  51. fi
  52. done
  53. echo "Unable to find Chrome history database. You can supply it manually as a second parameter." >&2
  54. exit 1
  55. else
  56. echo ~/Library/Application\ Support/Google/Chrome/Default/History
  57. fi
  58. }
  59. export_chrome() {
  60. if [[ -e "$2" ]]; then
  61. cp "$2" "$OUTPUT_DIR/chrome_history.db.tmp"
  62. else
  63. default="$(find_chrome_history_db)"
  64. echo "Defaulting to history db: $default"
  65. echo "Optionally specify the path to a different sqlite history database as the 2nd argument."
  66. cp "$default" "$OUTPUT_DIR/chrome_history.db.tmp"
  67. fi
  68. sqlite3 "$OUTPUT_DIR/chrome_history.db.tmp" "
  69. SELECT '[' || group_concat(
  70. json_object('timestamp', last_visit_time, 'description', title, 'href', url)
  71. ) || ']'
  72. FROM urls;" > "$OUTPUT_DIR/chrome_history.json"
  73. jq '.roots.other.children[] | {href: .url, description: .name, timestamp: .date_added}' \
  74. < "$(dirname "${2:-$default}")"/Bookmarks \
  75. > "$OUTPUT_DIR/chrome_bookmarks.json"
  76. rm "$OUTPUT_DIR"/chrome_history.db.*
  77. echo "Chrome history exported to:"
  78. echo " $OUTPUT_DIR/chrome_history.json"
  79. echo " $OUTPUT_DIR/chrome_bookmarks.json"
  80. }
  81. export_firefox() {
  82. if [[ -e "$2" ]]; then
  83. cp "$2" "$OUTPUT_DIR/firefox_history.db.tmp"
  84. else
  85. default="$(find_firefox_places_db)"
  86. echo "Defaulting to history db: $default"
  87. echo "Optionally specify the path to a different sqlite history database as the 2nd argument."
  88. cp "$default" "$OUTPUT_DIR/firefox_history.db.tmp"
  89. fi
  90. sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" "
  91. SELECT
  92. '[' || group_concat(
  93. json_object(
  94. 'timestamp', last_visit_date,
  95. 'description', title,
  96. 'href', url
  97. )
  98. ) || ']'
  99. FROM moz_places;" > "$OUTPUT_DIR/firefox_history.json"
  100. sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" "
  101. with recursive tags AS (
  102. select id, title, '' AS tags
  103. FROM moz_bookmarks
  104. where parent == 0
  105. UNION ALL
  106. select c.id, p.title, c.title || ',' || tags AS tags
  107. from moz_bookmarks AS c
  108. JOIN tags AS p
  109. ON c.parent = p.id
  110. )
  111. SELECT '[' || group_concat(json_object('timestamp', b.dateAdded, 'description', b.title, 'href', f.url, 'tags', tags.tags)) || ']'
  112. FROM moz_bookmarks AS b
  113. JOIN moz_places AS f ON f.id = b.fk
  114. JOIN tags ON tags.id = b.parent
  115. WHERE f.url LIKE '%://%';" > "$OUTPUT_DIR/firefox_bookmarks.json"
  116. rm "$OUTPUT_DIR"/firefox_history.db.*
  117. echo "Firefox history exported to:"
  118. echo " $OUTPUT_DIR/firefox_history.json"
  119. echo " $OUTPUT_DIR/firefox_bookmarks.json"
  120. }
  121. export_safari() {
  122. if [[ -e "$2" ]]; then
  123. cp "$2" "$OUTPUT_DIR/safari_history.db.tmp"
  124. else
  125. default=~"/Library/Safari/History.db"
  126. echo "Defaulting to history db: $default"
  127. echo "Optionally specify the path to a different sqlite history database as the 2nd argument."
  128. cp "$default" "$OUTPUT_DIR/safari_history.db.tmp"
  129. fi
  130. sqlite3 "$OUTPUT_DIR/safari_history.db.tmp" "select url from history_items" > "$OUTPUT_DIR/safari_history.json"
  131. rm "$OUTPUT_DIR"/safari_history.db.*
  132. echo "Safari history exported to:"
  133. echo " $OUTPUT_DIR/safari_history.json"
  134. }
  135. if [[ "$BROWSER_TO_EXPORT" == "--chrome" ]]; then
  136. export_chrome "$@"
  137. elif [[ "$BROWSER_TO_EXPORT" == "--firefox" ]]; then
  138. export_firefox "$@"
  139. elif [[ "$BROWSER_TO_EXPORT" == "--safari" ]]; then
  140. export_safari "$@"
  141. else
  142. echo "Unrecognized argument: $1" >&2
  143. exit 1
  144. fi