join_ctm.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. """Join multiple binary files into single file and generate JSON snippet with offsets
  2. -------------------------------------
  3. How to use
  4. -------------------------------------
  5. python join_ctm.py -i "part_*.ctm" -o joined.ctm [-j offsets.js]
  6. Will read multiple files following wildcard pattern (ordered lexicographically):
  7. part_000.ctm
  8. part_001.ctm
  9. part_002.ctm
  10. ...
  11. part_XXX.ctm
  12. And generate single concatenated files:
  13. joined.ctm
  14. offsets.js (optional, offsets are also dumped to standard output)
  15. """
  16. import getopt
  17. import glob
  18. import sys
  19. import os
  20. # #####################################################
  21. # Templates
  22. # #####################################################
  23. TEMPLATE_JSON = u"""\
  24. "offsets": [ %(offsets)s ],
  25. """
  26. # #############################################################################
  27. # Helpers
  28. # #############################################################################
  29. def usage():
  30. print 'Usage: %s -i "filename_*.ctm" -o filename.ctm [-j offsets.js]' % os.path.basename(sys.argv[0])
  31. # #####################################################
  32. # Main
  33. # #####################################################
  34. if __name__ == "__main__":
  35. # get parameters from the command line
  36. try:
  37. opts, args = getopt.getopt(sys.argv[1:], "hi:o:j:", ["help", "input=", "output=", "json="])
  38. except getopt.GetoptError:
  39. usage()
  40. sys.exit(2)
  41. inpattern = ""
  42. outname = ""
  43. jsonname = ""
  44. for o, a in opts:
  45. if o in ("-h", "--help"):
  46. usage()
  47. sys.exit()
  48. elif o in ("-i", "--input"):
  49. inpattern = a
  50. elif o in ("-o", "--output"):
  51. outname = a
  52. elif o in ("-j", "--json"):
  53. jsonname = a
  54. # quit if required parameters are missing
  55. if inpattern == "" or outname == "":
  56. usage()
  57. sys.exit(2)
  58. outfile = open(outname, "wb")
  59. matches = glob.glob(inpattern)
  60. matches.sort()
  61. total = 0
  62. offsets = []
  63. for filename in matches:
  64. filesize = os.path.getsize(filename)
  65. offsets.append(total)
  66. total += filesize
  67. print filename, filesize
  68. infile = open(filename, "rb")
  69. buffer = infile.read()
  70. outfile.write(buffer)
  71. infile.close()
  72. outfile.close()
  73. json_str = TEMPLATE_JSON % {
  74. "offsets" : ", ".join(["%d" % o for o in offsets])
  75. }
  76. print json_str
  77. if jsonname:
  78. jsonfile = open(jsonname, "w")
  79. jsonfile.write(json_str)
  80. jsonfile.close()