dot_url_resolve.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. #!/usr/bin/env python3
  2. """
  3. Resolve external image references in a Graphviz source.
  4. Nodes in a graph can have an associated image, `my_node[image="foo.png"]`, but
  5. the image string must be a path to a local file. Using a URL to point to a
  6. remotely hosted image is not supported natively. This script resolves such
  7. external references allowing the use of such references:
  8. echo 'graph { a[image="https://graphviz.org/Resources/app.png"]; }' \
  9. | dot_url_resolve.py \
  10. | dot -Tpng -o my_output.png
  11. This script does not have a sophisticated understanding of the Graphviz
  12. language. It simply treats anything that looks like a string containing a URL as
  13. something that should be downloaded.
  14. """
  15. import argparse
  16. import hashlib
  17. import io
  18. import logging
  19. import re
  20. import sys
  21. import tempfile
  22. import urllib.request
  23. from pathlib import Path
  24. from typing import Dict, List, Optional, TextIO
  25. def _translate(
  26. source: str,
  27. translations: Dict[str, Path],
  28. local_store: Path,
  29. log: Optional[logging.Logger],
  30. ) -> str:
  31. """
  32. convert a remote URL to a local path, downloading if necessary
  33. If `source` is not a remote URL, it is returned as-is.
  34. Args:
  35. source: URL to resolve
  36. translations: accumulated mapping from URLs to local paths
  37. local_store: directory to write downloaded files to
  38. log: optional progress sink
  39. Returns:
  40. local path corresponding to where the URL was downloaded to
  41. """
  42. # does this look like a remote URL?
  43. if re.match(r"https?:", source, flags=re.IGNORECASE):
  44. # have we not yet downloaded this ?
  45. local = translations.get(source)
  46. if local is None:
  47. # generate a unique local filename to write to
  48. digest = hashlib.sha256(source.encode("utf-8")).hexdigest()
  49. extension = Path(source).suffix
  50. dest = local_store / f"{digest}{extension}"
  51. # download the file
  52. if log is not None:
  53. log.info(f"downloading {source} → {dest}")
  54. urllib.request.urlretrieve(source, dest)
  55. translations[source] = dest
  56. return str(translations[source])
  57. return source
  58. def resolve(
  59. inp: TextIO, outp: TextIO, local_store: Path, log: Optional[logging.Logger] = None
  60. ) -> Dict[str, Path]:
  61. """
  62. process Graphviz source, converting remote URLs to local paths
  63. Args:
  64. inp: source to read from
  65. outp: destination to write to
  66. local_store: directory to write downloaded files to
  67. log: optional progress sink
  68. Returns:
  69. a mapping from URLs discovered to paths to which they were downloaded
  70. """
  71. # translations from original URLs to local paths
  72. downloaded: Dict[str, Path] = {}
  73. in_string = False
  74. pending = io.StringIO()
  75. while True:
  76. c = inp.read(1)
  77. if in_string:
  78. # does this terminate a string we were accruing?
  79. if c in ("", '"'):
  80. accrued = pending.getvalue()
  81. pending = io.StringIO()
  82. outp.write(_translate(accrued, downloaded, local_store, log))
  83. in_string = False
  84. else:
  85. pending.write(c)
  86. continue
  87. elif not in_string and c == '"':
  88. in_string = True
  89. if c == "":
  90. break
  91. outp.write(c)
  92. return downloaded
  93. def main(args: List[str]) -> int:
  94. """
  95. entry point
  96. """
  97. # parse command line options
  98. parser = argparse.ArgumentParser(description=__doc__)
  99. parser.add_argument(
  100. "input",
  101. nargs="?",
  102. type=argparse.FileType("rt"),
  103. default=sys.stdin,
  104. help="Graphviz source to read",
  105. )
  106. parser.add_argument(
  107. "output",
  108. nargs="?",
  109. type=argparse.FileType("wt"),
  110. default=sys.stdout,
  111. help="Graphviz source to write",
  112. )
  113. parser.add_argument(
  114. "--local-dir",
  115. help="path to write resolved files to (default: temporary location)",
  116. )
  117. parser.add_argument(
  118. "--quiet", "-q", action="store_true", help="suppress progress messages"
  119. )
  120. options = parser.parse_args(args[1:])
  121. # use a temporary directory if we were not given one
  122. if options.local_dir is None:
  123. options.local_dir = Path(tempfile.mkdtemp())
  124. else:
  125. options.local_dir = Path(options.local_dir)
  126. # setup logging
  127. log = logging.getLogger()
  128. log.setLevel(logging.WARNING if options.quiet else logging.INFO)
  129. handler = logging.StreamHandler(sys.stderr)
  130. log.addHandler(handler)
  131. resolve(options.input, options.output, options.local_dir, log)
  132. return 0
  133. if __name__ == "__main__":
  134. sys.exit(main(sys.argv))