gen_test_res.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. #!/usr/bin/env python3
  2. # Generates files and directories in test-res used for Unicode sys tests.
  3. # The test vector printf'ed into data.bin, as well as the names in filenames()
  4. # should correspond exactly to the sequences in UnicodeSequences.valid.
  5. # Run with:
  6. # python3 gen_test_res.py
  7. # Or:
  8. # python3 gen_test_res.py TEST_INVALID_UNICODE_FS
  9. # The latter will attempt to create filenames which contain invalid Unicode
  10. # codepoints; this does not work on some filesystems, e.g. APFS.
  11. import os
  12. import shutil
  13. import sys
  14. MODE = " ".join(sys.argv[1:])
  15. TESTDIR = "test-res"
  16. # delete previous, if any
  17. if os.path.isdir(TESTDIR):
  18. shutil.rmtree(TESTDIR)
  19. os.mkdir(TESTDIR)
  20. # Unicode test vectors
  21. all_unicode = [
  22. [0x01], # will not work on NTFS
  23. [0x7F],
  24. [0xC2, 0x80],
  25. [0xDF, 0xBF],
  26. [0xE0, 0xA0, 0x80],
  27. [0xED, 0x9F, 0xBF], # will not work on APFS
  28. [0xEE, 0x80, 0x80],
  29. [0xEF, 0xBF, 0xBD],
  30. [0xF0, 0x90, 0x80, 0x80],
  31. [0xF0, 0x9F, 0xBF, 0xBF], # will not work on APFS
  32. [0xF3, 0xBF, 0xBF, 0xBF], # will not work on APFS
  33. [0xF4, 0x80, 0x80, 0x80],
  34. [0xF4, 0x8F, 0xBF, 0xBF], # will not work on APFS
  35. [0xF0, 0x9F, 0x98, 0x82, 0xF0, 0x9F, 0x98, 0x84, 0xF0, 0x9F, 0x98, 0x99],
  36. [0xC8, 0xA7],
  37. [0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, 0xEF, 0xBC, 0x8C, 0xE3, 0x81, 0xAB,
  38. 0xE3, 0x81, 0xBB, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0x94]
  39. ]
  40. all_strings = [bytes(data).decode("utf-8") for data in all_unicode]
  41. all_filenames = all_strings[:]
  42. # Windows does not allow codepoints in the U+0000 - U+001F range
  43. # see https://docs.microsoft.com/en-us/windows/desktop/FileIO/naming-a-file
  44. if os.name == "nt":
  45. all_filenames.remove(bytes([0x01]).decode("utf-8"))
  46. # on APFS (macOS 10.13+), filenames must consist of valid Unicode codepoints
  47. if MODE != "TEST_INVALID_UNICODE_FS":
  48. all_filenames.remove(bytes([0xED, 0x9F, 0xBF]).decode("utf-8"))
  49. all_filenames.remove(bytes([0xF0, 0x9F, 0xBF, 0xBF]).decode("utf-8"))
  50. all_filenames.remove(bytes([0xF3, 0xBF, 0xBF, 0xBF]).decode("utf-8"))
  51. all_filenames.remove(bytes([0xF4, 0x8F, 0xBF, 0xBF]).decode("utf-8"))
  52. all_binary = b""
  53. for data in all_unicode:
  54. all_binary += bytes(data) + b"\n"
  55. # generate a file with Unicode data
  56. with open(os.path.join(TESTDIR, "data.bin"), "wb") as f:
  57. f.write(all_binary)
  58. # generate sub-directories with symlinks
  59. os.mkdir(os.path.join(TESTDIR, "a"))
  60. for data in all_filenames:
  61. os.mkdir(os.path.join(TESTDIR, data))
  62. os.mkdir(os.path.join(TESTDIR, "a", data))
  63. if os.name == "nt":
  64. continue
  65. for target, name in [
  66. ("../../bin/cpp/UtilityProcess-debug", "bin-cpp-debug"),
  67. ("../../bin/cpp/UtilityProcess", "bin-cpp"),
  68. ("../../bin/cs/bin/UtilityProcess-Debug.exe", "bin-cs-debug"),
  69. ("../../bin/cs/bin/UtilityProcess.exe", "bin-cs"),
  70. ("../../bin/hl/UtilityProcess.hl", "bin-hl"),
  71. ("../../bin/lua/UtilityProcess.lua", "bin-lua"),
  72. ("../../bin/java/UtilityProcess-Debug.jar", "bin-java-debug"),
  73. ("../../bin/java/UtilityProcess.jar", "bin-java"),
  74. ("../../bin/jvm/UtilityProcess.jar", "bin-jvm"),
  75. ("../../bin/neko/UtilityProcess.n", "bin-neko"),
  76. ("../../bin/php/UtilityProcess/index.php", "bin-php"),
  77. ("../../bin/python/UtilityProcess.py", "bin-py"),
  78. ("../../src/UtilityProcess.hx", "bin-eval"),
  79. ("../../bin/js/UtilityProcess.js", "bin-js")
  80. ]:
  81. os.symlink(target, os.path.join(TESTDIR, data, name), target_is_directory=False)
  82. # files
  83. os.mkdir(os.path.join(TESTDIR, "b"))
  84. for data in all_filenames:
  85. with open(os.path.join(TESTDIR, "b", data), "wb") as f:
  86. f.write(all_binary)