Pārlūkot izejas kodu

Unicode sys tests and fixes (#8135)

* sys Unicode test suite

* remove invalid and non-BMP codepoints from the test suite

* initial FileSystem test (readDirectory)

* exists, isDirectory, stat

* setCwd/getCwd tests, a small clean up in the test suite

* fix symlinks in test suite

* fullPath test

* test absolutePath

* fix cpp crash (tests still fail)

* secondary process used to test individual functions

* change test suite symlink

* secondary process test scaffolding, more path tests, some IO tests

* compile utility on cpp

* stdin, stderr, stdout tests via utility process

* readLine test

* posinfos, remove cpp temp fix, fix data.bin

* change all symlinks to utility process

* compile utility process on all targets

* fix stdout / stderr write tests

* move unicode sequences to separate file

* separate args tests from stdout/err tests

* print, println

* move runUtility to UtilityProcess, getEnv, putEnv, trace tests

* minor cleanup in utility process

* test Sys.environment

* test readString, readUntil

* test haxe.io.Path

* getString, getContent

* saveContent

* write, update, append

* rename, copy, delteFile, createDirectory, deleteDirectory

* Unicode paths for File tests

* run genTestRes.sh in test setup

* spawn processes properly on eval

* add lua symlinks

* C# also resolves symlink before programPath

* remove programPath test (does not work with symlinks)

* enable sys tests for HashLink

* haxe.io.Path use Unicode iterator on UTF-16 platforms

* same but only on hl

* re-enable non-BMP, add Unicode filename originally from FileNames.hx

* add Unicode test suite files and directories to git

* remove the files (crashes AppVeyor), port bash generation script to python

* test jvm

* don't create U+0001 on Windows

* separate filenames and strings in script

* fix script

* clean up, use Path.join, temporarily run Unicode tests only

* fix Hl binary path

* sort out Hl paths

* fix cleanup, use if utf16 for Path.normalize

* expect CRLF on Windows

* except where it should be LF

* fix python println, disable symlink tests on Windows

* set sys encoding to utf-8 in python preamble

* use CRLF on php on Windows

* try chcp 65001

* run Hl on CI properly this time

* use binary mode for eval stdout

* set output encoding to UTF-8 on cs

* set output encoding on java

* move stdout encoding setting to Init class in java and jvm

* disable Unicode command line arguments on Java

* CRLF on Windows on eval

* CRLF on eval trace, disable Unicode arguments on eval on Windows

* also set stderr to UTF-8

* add test-res directory to .gitignore

* fix typo in RunCi

* typo fix in genpy

* re-enable sys tests, disable various target-specific tests due to open issues

* updated CHANGES.txt

* add HL to PATH in CI

* disable more C++ tests

* expect CRCRLF for now

* HL/C++ disable some tests on Windows

* fix conditional compilation

* disable args test on HashLink
Aurel 6 gadi atpakaļ
vecāks
revīzija
51b10b5f6a

+ 1 - 0
.gitignore

@@ -82,6 +82,7 @@ tests/unit/unit.py.res1.txt
 tests/unit/unit.py.res2.bin
 tests/sys/bin/
 /tests/sys/dump/
+/tests/sys/test-res/
 tests/optimization/dump/
 tests/misc/projects/*/*.n
 tests/misc/*/*/*.lua

+ 2 - 0
appveyor.yml

@@ -83,6 +83,8 @@ build_script:
     - haxelib setup "%HAXELIB_ROOT%"
 
 test_script:
+    # change codepage to UTF-8
+    - cmd: chcp 65001
     - cd %APPVEYOR_BUILD_FOLDER%/tests/
     - haxe -version
     - haxe RunCi.hxml

+ 1 - 0
extra/CHANGES.txt

@@ -44,6 +44,7 @@
 	all : fixed `@:allow(package)` allowing too much (#8306)
 	all : fixed various issues with startIndex handling on String.indexOf and String.lastIndexOf
 	all : fixed infinite recursion related to printing of objects with circular references (#8113)
+	sys : fixed various Unicode issues (#8135)
 	macro : fixed Array.pop handling (#8075)
 	macro : fixed assertion failure when throwing exception (#8039)
 	macro : fixed various uncatchable exceptions being thrown

+ 2 - 0
src/compiler/main.ml

@@ -467,6 +467,8 @@ and init ctx =
 	let com = ctx.com in
 	let classes = ref [([],"Std")] in
 try
+	set_binary_mode_out stdout true;
+	set_binary_mode_out stderr true;
 	let xml_out = ref None in
 	let json_out = ref None in
 	let swf_header = ref None in

+ 2 - 0
src/generators/genjava.ml

@@ -2149,6 +2149,8 @@ let generate con =
 								| _ -> ()
 				with | Not_found -> ()
 				);
+				write w "haxe.java.Init.init();";
+				newline w;
 				(match gen.gcon.main with
 					| Some(expr) ->
 						expr_s w (mk_block expr)

+ 1 - 0
src/generators/genjvm.ml

@@ -2495,6 +2495,7 @@ class tclass_to_jvm gctx c = object(self)
 				load();
 				jm#putstatic ([],"Sys") "_args" (TArray(string_sig,None))
 			end;
+			jm#invokestatic (["haxe"; "java"], "Init") "init" (method_sig [] None);
 			jm#invokestatic jc#get_this_path "main" (method_sig [] None);
 			jm#return
 

+ 4 - 1
src/generators/genpy.ml

@@ -2495,7 +2495,10 @@ module Generator = struct
 		let ctx = mk_context com in
 		Codegen.map_source_header com (fun s -> print ctx "# %s\n# coding: utf-8\n" s);
 		if has_feature ctx "closure_Array" || has_feature ctx "closure_String" then
-			spr ctx "from functools import partial as _hx_partial";
+			spr ctx "from functools import partial as _hx_partial\n";
+		spr ctx "import sys\n";
+		spr ctx "if sys.stdout.encoding != 'utf-8':\n    sys.stdout = open(sys.stdout.fileno(), mode='w', encoding='utf8', buffering=1)\n";
+		spr ctx "if sys.stderr.encoding != 'utf-8':\n    sys.stderr = open(sys.stderr.fileno(), mode='w', encoding='utf8', buffering=1)\n\n";
 		gen_imports ctx;
 		gen_resources ctx;
 		gen_types ctx;

+ 7 - 3
src/macro/eval/evalStdLib.ml

@@ -1428,6 +1428,10 @@ module StdLock = struct
 	)
 end
 
+let lineEnd = match Sys.os_type with
+	| "Win32" | "Cygwin" -> "\r\n"
+	| _ -> "\n"
+
 module StdLog = struct
 	let key_fileName = hash "fileName"
 	let key_lineNumber = hash "lineNumber"
@@ -1436,7 +1440,7 @@ module StdLog = struct
 	let trace = vfun2 (fun v infos ->
 		let s = value_string v in
 		let s = match infos with
-			| VNull -> Printf.sprintf "%s\n" s
+			| VNull -> (Printf.sprintf "%s" s) ^ lineEnd
 			| _ ->  let infos = decode_object infos in
 				let file_name = decode_string (object_field infos key_fileName) in
 				let line_number = decode_int (object_field infos key_lineNumber) in
@@ -1444,7 +1448,7 @@ module StdLog = struct
 					| VArray va -> s :: (List.map value_string (EvalArray.to_list va))
 					| _ -> [s]
 				in
-				 (Printf.sprintf "%s:%i: %s\n" file_name line_number (String.concat "," l)) in
+				(Printf.sprintf "%s:%i: %s" file_name line_number (String.concat "," l)) ^ lineEnd in
 		((get_ctx()).curapi.MacroApi.get_com()).Common.print s;
 		vnull
 	)
@@ -2548,7 +2552,7 @@ module StdSys = struct
 	let println = vfun1 (fun v ->
 		let ctx = get_ctx() in
 		let com = ctx.curapi.get_com() in
-		com.print (value_string v ^ "\n");
+		com.print (value_string v ^ lineEnd);
 		vnull
 	)
 

+ 1 - 0
std/cs/Boot.hx

@@ -42,6 +42,7 @@ class Boot
 
 	@:keep public static function init():Void
 	{
+		cs.system.Console.OutputEncoding = new cs.system.text.UTF8Encoding();
 		cs.Lib.applyCultureChanges();
 	}
 

+ 5 - 0
std/haxe/io/Path.hx

@@ -230,8 +230,13 @@ class Path {
 		var acc = new StringBuf();
 		var colon = false;
 		var slashes = false;
+		#if utf16
+		for (c in haxe.iterators.StringIteratorUnicode.unicodeIterator(tmp)) {
+			switch (c) {
+		#else
 		for (i in 0...tmp.length) {
 			switch (StringTools.fastCodeAt(tmp, i)) {
+		#end
 				case ":".code:
 					acc.add(":");
 					colon = true;

+ 1 - 0
std/java/Boot.hx

@@ -25,6 +25,7 @@ import java.internal.Function;
 import java.internal.HxObject;
 import java.internal.Runtime;
 import java.Lib;
+import java.Init;
 //import java.internal.StringExt;
 import java.StdTypes;
 import Reflect;

+ 31 - 0
std/java/Init.hx

@@ -0,0 +1,31 @@
+/*
+ * Copyright (C)2005-2019 Haxe Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+package java;
+
+@:native("haxe.java.Init") @:keep class Init {
+	public static function init():Void {
+		try {
+			java.lang.System.setOut(new java.io.PrintStream(java.lang.System.out, true, "utf-8"));
+			java.lang.System.setErr(new java.io.PrintStream(java.lang.System.err, true, "utf-8"));
+		} catch (e:java.io.UnsupportedEncodingException) {}
+	}
+}

+ 1 - 0
std/jvm/Jvm.hx

@@ -12,6 +12,7 @@ import jvm.annotation.EnumReflectionInformation;
 import jvm.annotation.EnumValueReflectionInformation;
 import java.lang.invoke.*;
 import java.NativeArray;
+import java.Init;
 import haxe.ds.Vector;
 import haxe.ds.Option;
 

+ 3 - 1
std/php/_std/Sys.hx

@@ -26,6 +26,8 @@ import sys.io.FileInput;
 import haxe.SysTools;
 
 @:coreApi class Sys {
+	static var lineEnd:String = Sys.systemName() == "Windows" ? "\r\n" : "\n";
+
 	/** Environment variables set by `Sys.putEnv()` */
 	static var customEnvVars = new NativeAssocArray<String>();
 
@@ -34,7 +36,7 @@ import haxe.SysTools;
 	}
 
 	public static inline function println( v : Dynamic ) : Void {
-		Global.echo(Std.string(v) + "\n");
+		Global.echo(Std.string(v) + lineEnd);
 	}
 
 	public static function args() : Array<String> {

+ 2 - 1
std/python/Lib.hx

@@ -34,6 +34,7 @@ typedef PySys = python.lib.Sys;
 **/
 class Lib {
 
+	static var lineEnd:String = Sys.systemName() == "Windows" ? "\r\n" : "\n";
 	static public var __name__(get, never):String;
 	static inline function get___name__():String return python.Syntax.code('__name__');
 
@@ -54,7 +55,7 @@ class Lib {
 	**/
 	public static inline function println(v:Dynamic):Void {
 		var str = Std.string(v);
-		printString('$str\n');
+		printString('$str$lineEnd');
 	}
 
 	/**

+ 23 - 6
tests/runci/targets/Hl.hx

@@ -6,12 +6,28 @@ import runci.System.*;
 import runci.Config.*;
 
 class Hl {
+    static var hlSrc = switch [ci, systemName] {
+      case [AppVeyor, "Windows"]: "C:\\hashlink";
+      case _: Path.join([Sys.getEnv("HOME"), "hashlink"]);
+    };
+    static var hlBuild = switch [ci, systemName] {
+      case [AppVeyor, "Windows"]: "C:\\hashlink_build";
+      case _: Path.join([Sys.getEnv("HOME"), "hashlink_build"]);
+    };
+    static var hlBinDir = switch [ci, systemName] {
+      case [AppVeyor, "Windows"]: "C:\\hashlink_build\\bin";
+      case _: Path.join([Sys.getEnv("HOME"), "hashlink_build", "bin"]);
+    };
+    static var hlBinary = switch [ci, systemName] {
+      case [AppVeyor, "Windows"]: "C:\\hashlink_build\\bin\\hl";
+      case _: Path.join([Sys.getEnv("HOME"), "hashlink_build", "bin", "hl"]);
+    };
+
     static public function getHlDependencies() {
         if (commandSucceed("hl", ["--version"])) {
             infoMsg('hl has already been installed.');
             return;
         }
-        var hlSrc = Path.join([Sys.getEnv("HOME"), "hashlink"]);
         runCommand("git", ["clone", "https://github.com/HaxeFoundation/hashlink.git", hlSrc]);
 
         switch (systemName) {
@@ -23,7 +39,6 @@ class Hl {
                 //pass
         }
 
-        var hlBuild = Path.join([Sys.getEnv("HOME"), "hashlink_build"]);
         FileSystem.createDirectory(hlBuild);
         var generator = systemName == "Windows" ? [] : ["-GNinja"];
         runCommand("cmake", generator.concat([
@@ -45,19 +60,21 @@ class Hl {
             "--build", hlBuild
         ]);
 
-        addToPATH(Path.join([hlBuild, "bin"]));
-        runCommand("hl", ["--version"]);
+        runCommand(hlBinary, ["--version"]);
+        addToPATH(hlBinDir);
     }
 
     static public function run(args:Array<String>) {
         getHlDependencies();
         runCommand("haxe", ["compile-hl.hxml"].concat(args));
-        runCommand("hl", ["bin/unit.hl"]);
+        runCommand(hlBinary, ["bin/unit.hl"]);
 
 		// changeDirectory(threadsDir);
 		// runCommand("haxe", ["build.hxml", "-hl", "export/threads.hl"]);
 		// runCommand("hl", ["export/threads.hl"]);
 
-        // TODO sys test
+        changeDirectory(sysDir);
+        runCommand("haxe", ["compile-hl.hxml"]);
+        runCommand(hlBinary, ["bin/hl/sys.hl"]);
     }
 }

+ 5 - 0
tests/sys/compile-cpp.hxml

@@ -10,4 +10,9 @@ compile-each.hxml
 --next
 compile-each.hxml
 --main ExitCode
+-cpp bin/cpp
+
+--next
+compile-each.hxml
+--main UtilityProcess
 -cpp bin/cpp

+ 5 - 0
tests/sys/compile-cs.hxml

@@ -10,4 +10,9 @@ compile-each.hxml
 --next
 compile-each.hxml
 --main ExitCode
+-cs bin/cs
+
+--next
+compile-each.hxml
+--main UtilityProcess
 -cs bin/cs

+ 6 - 1
tests/sys/compile-hl.hxml

@@ -10,4 +10,9 @@ compile-each.hxml
 --next
 compile-each.hxml
 --main ExitCode
--hl bin/hl/ExitCode.hl
+-hl bin/hl/ExitCode.hl
+
+--next
+compile-each.hxml
+--main UtilityProcess
+-hl bin/hl/UtilityProcess.hl

+ 5 - 0
tests/sys/compile-java.hxml

@@ -10,4 +10,9 @@ compile-each.hxml
 --next
 compile-each.hxml
 --main ExitCode
+-java bin/java
+
+--next
+compile-each.hxml
+--main UtilityProcess
 -java bin/java

+ 6 - 0
tests/sys/compile-jvm.hxml

@@ -13,4 +13,10 @@ compile-each.hxml
 compile-each.hxml
 --main ExitCode
 -java bin/jvm
+-D jvm
+
+--next
+compile-each.hxml
+--main UtilityProcess
+-java bin/jvm
 -D jvm

+ 5 - 0
tests/sys/compile-lua.hxml

@@ -12,3 +12,8 @@ compile-each.hxml
 compile-each.hxml
 --main ExitCode
 -lua bin/lua/ExitCode.lua
+
+--next
+compile-each.hxml
+--main UtilityProcess
+-lua bin/lua/UtilityProcess.lua

+ 5 - 0
tests/sys/compile-neko.hxml

@@ -11,3 +11,8 @@ compile-each.hxml
 # compile-each.hxml
 # -main ExitCode
 # -neko bin/neko/ExitCode.n
+
+--next
+compile-each.hxml
+--main UtilityProcess
+-neko bin/neko/UtilityProcess.n

+ 6 - 1
tests/sys/compile-php.hxml

@@ -10,4 +10,9 @@ compile-each.hxml
 --next
 compile-each.hxml
 --main ExitCode
--php bin/php/ExitCode
+-php bin/php/ExitCode
+
+--next
+compile-each.hxml
+--main UtilityProcess
+-php bin/php/UtilityProcess

+ 6 - 1
tests/sys/compile-python.hxml

@@ -10,4 +10,9 @@ compile-each.hxml
 --next
 compile-each.hxml
 --main ExitCode
--python bin/python/ExitCode.py
+-python bin/python/ExitCode.py
+
+--next
+compile-each.hxml
+--main UtilityProcess
+-python bin/python/UtilityProcess.py

+ 82 - 0
tests/sys/genTestRes.py

@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+
+# Generates files and directories in test-res used for Unicode sys tests.
+# The test vector printf'ed into data.bin, as well as the names in filenames()
+# should correspond exactly to the sequences in UnicodeSequences.valid.
+
+import os
+import shutil
+
+TESTDIR = "test-res"
+
+# delete previous, if any
+if os.path.isdir(TESTDIR):
+  shutil.rmtree(TESTDIR)
+
+os.mkdir(TESTDIR)
+
+# Unicode test vectors
+allUnicode = [
+    [0x01],
+    [0x7F],
+    [0xC2, 0x80],
+    [0xDF, 0xBF],
+    [0xE0, 0xA0, 0x80],
+    [0xED, 0x9F, 0xBF],
+    [0xEE, 0x80, 0x80],
+    [0xEF, 0xBF, 0xBD],
+    [0xF0, 0x90, 0x80, 0x80],
+    [0xF0, 0x9F, 0xBF, 0xBF],
+    [0xF3, 0xBF, 0xBF, 0xBF],
+    [0xF4, 0x80, 0x80, 0x80],
+    [0xF4, 0x8F, 0xBF, 0xBF],
+    [0xF0, 0x9F, 0x98, 0x82, 0xF0, 0x9F, 0x98, 0x84, 0xF0, 0x9F, 0x98, 0x99],
+    [0xC8, 0xA7],
+    [0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, 0xEF, 0xBC, 0x8C, 0xE3, 0x81, 0xAB, 0xE3, 0x81, 0xBB, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0x94]
+  ]
+
+allStrings = [ bytes(data).decode("utf-8") for data in allUnicode ]
+
+allFilenames = allStrings[:]
+# Windows does not allow codepoints in the U+0000 - U+001F range
+# see https://docs.microsoft.com/en-us/windows/desktop/FileIO/naming-a-file
+if os.name == "nt":
+  allFilenames.remove(bytes([0x01]).decode("utf-8"))
+
+allBinary = b""
+for data in allUnicode:
+  allBinary += bytes(data) + b"\n"
+
+# generate a file with Unicode data
+with open(os.path.join(TESTDIR, "data.bin"), "wb") as f:
+  f.write(allBinary)
+
+# generate sub-directories with symlinks
+os.mkdir(os.path.join(TESTDIR, "a"))
+for data in allFilenames:
+  os.mkdir(os.path.join(TESTDIR, data))
+  os.mkdir(os.path.join(TESTDIR, "a", data))
+  if os.name != "nt":
+    for target, name in [
+      ("../../bin/cpp/UtilityProcess-debug", "bin-cpp-debug"),
+      ("../../bin/cpp/UtilityProcess", "bin-cpp"),
+      ("../../bin/cs/bin/UtilityProcess-Debug.exe", "bin-cs-debug"),
+      ("../../bin/cs/bin/UtilityProcess.exe", "bin-cs"),
+      ("../../bin/hl/UtilityProcess.hl", "bin-hl"),
+      ("../../bin/lua/UtilityProcess.lua", "bin-lua"),
+      ("../../bin/java/UtilityProcess-Debug.jar", "bin-java-debug"),
+      ("../../bin/java/UtilityProcess.jar", "bin-java"),
+      ("../../bin/jvm/UtilityProcess-Debug.jar", "bin-jvm-debug"),
+      ("../../bin/jvm/UtilityProcess.jar", "bin-jvm"),
+      ("../../bin/neko/UtilityProcess.n", "bin-neko"),
+      ("../../bin/php/UtilityProcess/index.php", "bin-php"),
+      ("../../bin/python/UtilityProcess.py", "bin-py"),
+      ("../../src/UtilityProcess.hx", "bin-eval")
+    ]:
+      os.symlink(target, os.path.join(TESTDIR, data, name), target_is_directory = False)
+
+# files
+os.mkdir(os.path.join(TESTDIR, "b"))
+for data in allFilenames:
+  with open(os.path.join(TESTDIR, "b", data), "wb") as f:
+    f.write(allBinary)

+ 0 - 6
tests/sys/src/FileNames.hx

@@ -17,12 +17,6 @@ class FileNames {
 		"(two words)",
 		"[two words]",
 
-		// Chinese, Japanese
-		#if !(cs || python || php || neko || cpp || java || lua || eval)
-		"中文,にほんご",
-		// this is a (Unicode) equivalent, but fails on OS X: "中文,にほんご",
-		#end
-
 		// "aaa...a"
 		[for (i in 0...100) "a"].join(""),
 	]

+ 1 - 0
tests/sys/src/Main.hx

@@ -5,6 +5,7 @@ import utest.ui.common.HeaderDisplayMode;
 class Main {
 	static public function main() {
 		var runner = new Runner();
+		runner.addCase(new TestUnicode());
 		runner.addCase(new TestSys());
 		runner.addCase(new TestFileSystem());
 		runner.addCase(new io.TestFile());

+ 422 - 0
tests/sys/src/TestUnicode.hx

@@ -0,0 +1,422 @@
+import utest.Assert;
+import haxe.io.Bytes;
+import haxe.io.Path;
+import sys.FileSystem;
+import sys.io.File;
+import UnicodeSequences.UnicodeString;
+import UnicodeSequences.codepointsToString;
+import UnicodeSequences.showUnicodeString;
+import UtilityProcess.runUtility;
+
+class TestUnicode extends utest.Test {
+	static var BIN_SYMLINK =
+#if cpp
+		#if debug
+			"bin-cpp-debug";
+		#else
+			"bin-cpp";
+		#end
+#elseif cs
+		#if debug
+			"bin-cs-debug";
+		#else
+			"bin-cs";
+		#end
+#elseif hl
+		"bin-hl";
+#elseif lua
+		"bin-lua";
+#elseif (java && jvm)
+		#if debug
+			"bin-jvm-debug";
+		#else
+			"bin-jvm";
+		#end
+#elseif java
+		#if debug
+			"bin-java-debug";
+		#else
+			"bin-java";
+		#end
+#elseif neko
+		"bin-neko";
+#elseif php
+		"bin-php";
+#elseif python
+		"bin-py";
+#elseif eval
+		"bin-eval";
+#else
+		null;
+#end
+
+	// list of filenames expected to NOT exist in sub-directories
+	static var nonExistentNames:Array<UnicodeString> = [
+		// Java escapes
+		Only([0x0025, 0x0030 , 0x0001]), // %01
+		Only([0x0025, 0x0037 , 0x0046]) // %7F
+	];
+
+	// list of expected filenames in sub-directories
+	static var names:Array<UnicodeString> = (Sys.systemName() == "Windows" ? UnicodeSequences.valid.slice(1) : UnicodeSequences.valid);
+
+	// extra files only present in the root test-res directory
+	static var namesRoot = names.concat([
+		Only([0x0061]), // a
+		Only([0x0062]), // b
+		Only([0x64, 0x61, 0x74, 0x61, 0x2E, 0x62, 0x69, 0x6E]) // data.bin
+	]);
+
+	static var endLine = (Sys.systemName() == "Windows" ? "\r\n" : "\n");
+
+	// same names and length, but possibly different order
+	// assumes no duplicates in expected
+	function sameFiles(actual:Array<String>, expected:Array<UnicodeString>):Void {
+		Assert.equals(actual.length, expected.length);
+		var remaining = expected.copy();
+		for (file in actual) {
+			var codepoints = UnicodeSequences.unicodeCodepoints(file);
+			var removed = remaining.filter(ref -> !UnicodeSequences.codepointsSame(codepoints, ref));
+			if (removed.length == remaining.length) {
+				Assert.fail('unexpected filename ${showUnicodeString(file)} found');
+			} else {
+				remaining = removed;
+			}
+		}
+	}
+
+	function assertUEnds(actual:String, expected:String, ?alt:String, ?pos:haxe.PosInfos):Void {
+		Assert.isTrue(
+			StringTools.endsWith(actual, expected) || (alt != null ? StringTools.endsWith(actual, alt) : false),
+			'expected ${showUnicodeString(actual)} to end with ${showUnicodeString(expected)}'
+			+ (alt != null ? ' or ${showUnicodeString(alt)}' : ""),
+			pos
+		);
+	}
+
+	function assertUEquals(actual:String, expected:String, ?msg:String, ?pos:haxe.PosInfos):Void {
+		Assert.equals(
+			expected, actual,
+			msg != null ? msg : 'expected ${showUnicodeString(actual)} to be ${showUnicodeString(expected)}',
+			pos
+		);
+	}
+
+	function assertBytesEqual(actual:Bytes, expected:Bytes, ?msg:String, ?pos:haxe.PosInfos):Void {
+		Assert.equals(
+			actual.compare(expected), 0,
+			msg != null ? msg : 'expected ${actual.toHex()} to be ${expected.toHex()}',
+			pos
+		);
+	}
+
+	function pathBoth(f:String->Void, ?path:String, ?skipNonExistent:Bool = true):Void {
+		for (filename in names) switch (filename) {
+			case Only(codepointsToString(_) => ref):
+			f(path != null ? Path.join([path, ref]) : ref);
+			case Normal(codepointsToString(_) => nfc, codepointsToString(_) => nfd):
+			var joinedNfc = path != null ? Path.join([path, nfc]) : nfc;
+			var joinedNfd = path != null ? Path.join([path, nfd]) : nfd;
+			if (!skipNonExistent || FileSystem.exists(joinedNfc)) f(joinedNfc);
+			if (!skipNonExistent || FileSystem.exists(joinedNfd)) f(joinedNfd);
+		}
+	}
+
+	function assertNormalEither(f:String->Bool, path:String, ?msg:String, ?pos:haxe.PosInfos):Void {
+		for (filename in names) Assert.isTrue(switch (filename) {
+			case Only(codepointsToString(_) => ref): f(Path.join([path, ref]));
+			case Normal(codepointsToString(_) => nfc, codepointsToString(_) => nfd):
+			f(Path.join([path, nfc])) || f(Path.join([path, nfd]));
+		}, '$msg ($filename in $path)', pos);
+	}
+
+	function setupClass() {
+		FileSystem.createDirectory("temp-unicode");
+		Sys.command("python3", ["genTestRes.py"]);
+	}
+
+	function teardownClass() {
+		if (FileSystem.exists("temp-unicode")) {
+			for (file in FileSystem.readDirectory("temp-unicode")) {
+				FileSystem.deleteFile(Path.join(["temp-unicode", file]));
+			}
+			FileSystem.deleteDirectory("temp-unicode");
+		}
+	}
+
+#if target.unicode
+	function testFilesystem() {
+#if !java
+#if !(cpp || cs) // C++ disabled temporarily (#8400), C# disabled temporarily (#8247)
+		// setCwd + getCwd
+		Sys.setCwd("test-res");
+		function enterLeave(dir:String, ?alt:String):Void {
+			Sys.setCwd(dir);
+			assertUEnds(Path.removeTrailingSlashes(Path.normalize(Sys.getCwd())), '/test-res/${dir}', alt != null ? '/test-res/${alt}' : null);
+			Sys.setCwd("..");
+		}
+		for (filename in names) switch (filename) {
+			case Only(codepointsToString(_) => ref): enterLeave(ref);
+			case Normal(codepointsToString(_) => nfc, codepointsToString(_) => nfd):
+			if (FileSystem.exists(nfc)) enterLeave(nfc, nfd);
+			if (FileSystem.exists(nfd)) enterLeave(nfd, nfc);
+		}
+		Sys.setCwd("..");
+#end
+#end
+
+		// absolutePath
+		pathBoth(path -> {
+				for (relative in [
+						{path: '../$path', end: '${path}'},
+						{path: 'foo', end: '${path}/foo'},
+						{path: "..", end: "test-res"},
+						{path: "./././././", end: path},
+						{path: "./..", end: "test-res"},
+						{path: "./čýžé", end: '${path}/čýžé'},
+						{path: "./čýžé/", end: '${path}/čýžé'},
+						{path: "./../čýžé", end: 'test-res/čýžé'},
+						{path: "./../čýžé/", end: 'test-res/čýžé'},
+					]) assertUEnds(
+						Path.normalize(FileSystem.absolutePath('$path/${relative.path}')),
+						relative.end
+					);
+			}, "test-res");
+
+#if !java
+#if !(cpp || cs) // C++ disabled temporarily (#8400), C# disabled temporarily (#8247)
+		assertNormalEither(path -> {
+				if (!FileSystem.exists(path)) return false; // NFC/NFD differences
+				Sys.setCwd(path);
+				var ret = true;
+				for (relative in [
+						{path: '../$path', end: '${path}'},
+						{path: 'foo', end: '${path}/foo'},
+						{path: "..", end: "test-res"},
+						{path: "./././././", end: path},
+						{path: "./..", end: "test-res"},
+						{path: "./čýžé", end: '${path}/čýžé'},
+						{path: "./čýžé/", end: '${path}/čýžé'},
+						{path: "./../čýžé", end: 'test-res/čýžé'},
+						{path: "./../čýžé/", end: 'test-res/čýžé'},
+					]) if (!StringTools.endsWith(Path.normalize(FileSystem.absolutePath('${relative.path}')), relative.end)) ret = false;
+				Sys.setCwd("../..");
+				return ret;
+			}, "test-res", "setCwd + absolutePath + endsWith failed");
+#end
+#end
+
+		// exists
+#if !cpp // C++ disabled temporarily (#8400)
+		assertNormalEither(FileSystem.exists, 'test-res/a', 'expected exists == true');
+		assertNormalEither(FileSystem.exists, 'test-res/b', 'expected exists == false');
+#end
+
+		// fullPath
+#if !lua // Lua disabled temporarily (#8215)
+		#if !cs // C# behaves like Windows here
+		if (Sys.systemName() != "Windows") {
+			// symlinks behave strangely on Windows
+			pathBoth(path -> {
+					assertUEnds(
+							Path.normalize(FileSystem.fullPath('$path/${BIN_SYMLINK}')),
+							'/${UtilityProcess.BIN_PATH}/${UtilityProcess.BIN_NAME}'
+						);
+				}, "test-res");
+		}
+		#end
+#end
+
+		// isDirectory
+		assertNormalEither(FileSystem.isDirectory, 'test-res/a', 'expected isDirectory == true');
+		assertNormalEither(path -> !FileSystem.isDirectory(path), 'test-res/b', 'expected isDirectory == false');
+
+		// readDirectory
+#if !cs // C# disabled temporarily (#8247)
+		sameFiles(FileSystem.readDirectory("test-res"), namesRoot);
+		sameFiles(FileSystem.readDirectory("test-res/a"), names);
+		sameFiles(FileSystem.readDirectory("test-res/b"), names);
+#end
+
+		// stat
+		assertNormalEither(path -> FileSystem.stat(path) != null, 'test-res/a', 'expected stat != null');
+		assertNormalEither(path -> FileSystem.stat(path) != null, 'test-res/b', 'expected stat != null');
+
+		// path
+		pathBoth(str -> {
+				Assert.equals(new Path('$str/a.b').dir, str);
+				Assert.equals(Path.directory('$str/a.b'), str);
+				Assert.equals(new Path('a/$str.b').file, str);
+				Assert.equals(new Path('a/b.$str').ext, str);
+				Assert.equals(Path.extension('a/b.$str'), str);
+				Assert.equals(Path.join([str, "a"]), '$str/a');
+				Assert.equals(Path.join(["a", str]), 'a/$str');
+				Assert.equals(Path.addTrailingSlash(str), '$str/');
+				Assert.equals(Path.normalize('a/../$str'), str);
+				Assert.equals(Path.normalize('$str/a/..'), str);
+			});
+
+		// rename
+#if !cpp // C++ disabled temporarily (#8400)
+		File.copy("test-res/data.bin", "temp-unicode/rename-me");
+		pathBoth(str -> {
+				FileSystem.rename('temp-unicode/rename-me', 'temp-unicode/$str');
+				Assert.isFalse(FileSystem.exists('temp-unicode/rename-me'));
+				Assert.isTrue(FileSystem.exists('temp-unicode/$str'));
+				FileSystem.rename('temp-unicode/$str', 'temp-unicode/rename-me');
+			});
+#end
+
+#if !cpp // C++ disabled temporarily (#8400)
+		pathBoth(str -> {
+				// copy
+				File.copy("test-res/data.bin", 'temp-unicode/$str');
+				Assert.isTrue(FileSystem.exists('temp-unicode/$str'));
+				assertBytesEqual(File.getBytes('temp-unicode/$str'), UnicodeSequences.validBytes);
+
+				// deleteFile
+				FileSystem.deleteFile('temp-unicode/$str');
+				Assert.isFalse(FileSystem.exists('temp-unicode/$str'));
+
+				// createDirectory
+				FileSystem.createDirectory('temp-unicode/$str');
+				Assert.isTrue(FileSystem.exists('temp-unicode/$str'));
+				Assert.equals(FileSystem.readDirectory('temp-unicode/$str').length, 0);
+
+				// deleteDirectory
+				FileSystem.deleteDirectory('temp-unicode/$str');
+				Assert.isFalse(FileSystem.exists('temp-unicode/$str'));
+			});
+#end
+	}
+
+	function testIPC() {
+		// stdin.readLine
+		UnicodeSequences.normalBoth(str -> {
+				assertUEquals(runUtility(["stdin.readLine"], {stdin: str + endLine}).stdout, str + endLine);
+			});
+
+		// stdin.readString
+		UnicodeSequences.normalBoth(str -> {
+				var byteLength = Bytes.ofString(str).length;
+				assertUEquals(runUtility(["stdin.readString", '${byteLength}'], {stdin: '$str'}).stdout, str + endLine);
+			});
+
+		// stdin.readUntil
+		UnicodeSequences.normalBoth(str -> {
+				// make sure the 0x70 byte is not part of the test string 
+				assertUEquals(runUtility(["stdin.readUntil", "0x70"], {stdin: str + "\x70" + str + "\x70"}).stdout, str + endLine);
+			});
+
+		UnicodeSequences.normalBothIndexed((str, i, nfc) -> {
+				var mode = nfc ? "nfc" : "nfd";
+				// stdout
+				assertUEquals(runUtility(["stdout.writeString", '$i', mode]).stdout, str);
+				// stderr
+				assertUEquals(runUtility(["stderr.writeString", '$i', mode]).stderr, str);
+				// print
+				assertUEquals(runUtility(["print", '$i', mode]).stdout, str);
+				// println
+				assertUEquals(runUtility(["println", '$i', mode]).stdout, str + endLine);
+				// trace
+				assertUEnds(runUtility(["trace", '$i', mode]).stdout, str + endLine);
+				#if !java
+#if (hl || cpp) if (Sys.systemName() != "Windows") { #end // HL and C++ temporarily disabled (#8379)
+				// putEnv + getEnv
+				assertUEquals(runUtility(["putEnv", "HAXE_TEST", '$i', mode, "getEnv", "HAXE_TEST"]).stdout, str + endLine);
+#if !lua // Lua disabled temporarily (#8216)
+				// putEnv + environment
+				assertUEquals(runUtility(["putEnv", "HAXE_TEST", '$i', mode, "environment", "HAXE_TEST"]).stdout, str + endLine);
+#end
+#if (hl || cpp) } #end // HL and C++ temporarily disabled (#8379)
+				#end
+			});
+
+		// args
+		#if !cs // C# behaves like Windows here
+		if (#if (java || eval || hl) Sys.systemName() != "Windows" #else true #end) {
+			// https://stackoverflow.com/questions/7660651/passing-command-line-unicode-argument-to-java-code
+			UnicodeSequences.normalBoth(str -> {
+					assertUEquals(runUtility(["args", str]).stdout, str + endLine);
+				});
+		}
+		#end
+	}
+
+	function testIO() {
+		// getBytes
+		assertBytesEqual(File.getBytes("test-res/data.bin"), UnicodeSequences.validBytes);
+		pathBoth(path -> {
+				assertBytesEqual(File.getBytes(path), UnicodeSequences.validBytes);
+			}, "test-res/b");
+
+		// getContent
+		assertUEquals(File.getContent("test-res/data.bin"), UnicodeSequences.validString);
+		pathBoth(path -> {
+				assertUEquals(File.getContent(path), UnicodeSequences.validString);
+			}, "test-res/b");
+
+		// saveContent
+		File.saveContent("temp-unicode/data.bin", UnicodeSequences.validString);
+		assertBytesEqual(File.getBytes("temp-unicode/data.bin"), UnicodeSequences.validBytes);
+#if !cs // C# disabled temporarily (#8247)
+		pathBoth(str -> {
+				File.saveContent('temp-unicode/saveContent-$str.bin', UnicodeSequences.validString);
+				assertBytesEqual(File.getBytes('temp-unicode/saveContent-$str.bin'), UnicodeSequences.validBytes);
+			});
+#end
+
+		// write
+		var out = File.write("temp-unicode/out.bin");
+		out.writeString(UnicodeSequences.validString);
+		out.close();
+		assertBytesEqual(File.getBytes("temp-unicode/out.bin"), UnicodeSequences.validBytes);
+#if !cs // C# disabled temporarily (#8247)
+		pathBoth(str -> {
+				var out = File.write('temp-unicode/write-$str.bin');
+				out.writeString(UnicodeSequences.validString);
+				out.close();
+				assertBytesEqual(File.getBytes('temp-unicode/write-$str.bin'), UnicodeSequences.validBytes);
+			});
+#end
+
+		// update
+		var out = File.update("temp-unicode/out.bin");
+		out.seek(0, SeekBegin);
+		out.writeString(UnicodeSequences.validString);
+		out.close();
+		assertBytesEqual(File.getBytes("temp-unicode/out.bin"), UnicodeSequences.validBytes);
+
+		// append
+		var out = File.append("temp-unicode/out.bin");
+		out.writeString(UnicodeSequences.validString);
+		out.close();
+		var repeated = Bytes.alloc(UnicodeSequences.validBytes.length * 2);
+		repeated.blit(0, UnicodeSequences.validBytes, 0, UnicodeSequences.validBytes.length);
+		repeated.blit(UnicodeSequences.validBytes.length, UnicodeSequences.validBytes, 0, UnicodeSequences.validBytes.length);
+		assertBytesEqual(File.getBytes("temp-unicode/out.bin"), repeated);
+
+		// readLine
+		var data = File.read("test-res/data.bin");
+		UnicodeSequences.normalNFC(str -> {
+				var line = data.readLine();
+				assertUEquals(line, str);
+			});
+
+		// readString
+		data.seek(0, SeekBegin);
+		UnicodeSequences.normalNFC(str -> {
+				var byteLength = Bytes.ofString(str).length;
+				var line = data.readString(byteLength + 1); // + newline character
+				assertUEquals(line, '$str\n');
+			});
+
+		// readUntil
+		data.seek(0, SeekBegin);
+		UnicodeSequences.normalNFC(str -> {
+				var line = data.readUntil(0x0A);
+				assertUEquals(line, str);
+			});
+	}
+#end
+}

+ 139 - 0
tests/sys/src/UnicodeSequences.hx

@@ -0,0 +1,139 @@
+using haxe.iterators.StringIteratorUnicode;
+
+enum UnicodeString {
+	Only(ref:Array<Int>);
+	Normal(nfc:Array<Int>, nfd:Array<Int>);
+}
+
+class UnicodeSequences {
+	// boundary conditions
+	public static var boundary:Array<UnicodeString> = [
+		// 1 byte
+		Only([0x0001]), // this must be first, see TestUnicode.names
+		Only([0x007F]),
+		// 2 byte
+		Only([0x0080]),
+		Only([0x07FF]),
+		// 3 byte
+		Only([0x0800]),
+		Only([0xD7FF]), // just before surrogates
+		Only([0xE000]), // just after surrogates
+		Only([0xFFFD]),
+		// non-BMP (4 byte)
+		Only([0x10000]),
+		Only([0x1FFFF]),
+		Only([0xFFFFF]),
+		Only([0x100000]),
+		Only([0x10FFFF])
+	];
+
+	// NFC / NFD
+	public static var normal:Array<UnicodeString> = [
+		Normal([0x0227], [0x0061, 0x0307]),
+		Normal([0x4E2D, 0x6587, 0xFF0C, 0x306B, 0x307B, 0x3093, 0x3054], [0x4E2D, 0x6587, 0xFF0C, 0x306B, 0x307B, 0x3093, 0x3053, 0x3099])
+	];
+
+	// valid sequences
+	public static var valid:Array<UnicodeString> =
+		boundary
+		.concat([Only([0x1F602, 0x1F604, 0x1F619])]) // important (non-BMP) emoji
+		.concat(normal);
+
+	public static var validBytes = haxe.io.Bytes.ofHex(
+			"010A" +
+			"7F0A" +
+			"C2800A" +
+			"DFBF0A" +
+			"E0A0800A" +
+			"ED9FBF0A" +
+			"EE80800A" +
+			"EFBFBD0A" +
+			"F09080800A" +
+			"F09FBFBF0A" +
+			"F3BFBFBF0A" +
+			"F48080800A" +
+			"F48FBFBF0A" +
+			"F09F9882F09F9884F09F98990A" +
+			"C8A70A" +
+			"E4B8ADE69687EFBC8CE381ABE381BBE38293E381940A"
+		);
+
+	public static var validString =
+		"\u0001\n" +
+		"\u007F\n" +
+		"\u0080\n" +
+		"\u07FF\n" +
+		"\u0800\n" +
+		"\uD7FF\n" +
+		"\uE000\n" +
+		"\uFFFD\n" +
+		"\u{10000}\n" +
+		"\u{1FFFF}\n" +
+		"\u{FFFFF}\n" +
+		"\u{100000}\n" +
+		"\u{10FFFF}\n" +
+		"\u{1F602}\u{1F604}\u{1F619}\n" +
+		"\u0227\n" +
+		"\u4E2D\u6587\uFF0C\u306B\u307B\u3093\u3054\n";
+
+	// invalid sequences
+	public static var invalid:Array<UnicodeString> = [
+		Only([0xFFFE]),
+		Only([0xFFFF])
+	];
+
+	// utility methods
+
+	public static function unicodeCodepoints(str:String):Array<Int> {
+		return [ for (codepoint in str.unicodeIterator()) codepoint ];
+	}
+
+	public static function codepointsToString(ref:Array<Int>):String {
+		return [ for (codepoint in ref) String.fromCharCode(codepoint) ].join("");
+	}
+
+	public static function showUnicodeString(str:String):String {
+		return '$str (${unicodeCodepoints(str)})';
+	}
+
+	public static function codepointsSame(actual:Array<Int>, expected:UnicodeString):Bool {
+		function sameOption(ref:Array<Int>):Bool {
+			if (actual.length != ref.length) return false;
+			for (i in 0...actual.length) if (actual[i] != ref[i]) return false;
+			return true;
+		}
+		return (switch (expected) {
+				case Only(ref): sameOption(ref);
+				case Normal(nfc, nfd):
+				// it might eventually be best to expect a particular form
+				// on specific targets + platforms + filesystems
+				// for now, allowing either
+				sameOption(nfc) || sameOption(nfd);
+			});
+	}
+
+	public static function normalNFC(f:String->Void):Void {
+		for (seq in valid) switch (seq) {
+			case Only(codepointsToString(_) => ref): f(ref);
+			case Normal(codepointsToString(_) => nfc, _): f(nfc);
+		}
+	}
+
+	public static function normalBoth(f:String->Void):Void {
+		for (seq in valid) switch (seq) {
+			case Only(codepointsToString(_) => ref): f(ref);
+			case Normal(codepointsToString(_) => nfc, codepointsToString(_) => nfd):
+			f(nfc);
+			f(nfd);
+		}
+	}
+
+	public static function normalBothIndexed(f:String->Int->Bool->Void):Void {
+		for (i in 0...valid.length) switch (valid[i]) {
+			case Only(codepointsToString(_) => ref): f(ref, i, false);
+			case Normal(codepointsToString(_) => nfc, codepointsToString(_) => nfd):
+			f(nfc, i, true);
+			f(nfd, i, false);
+		}
+	}
+}

+ 152 - 0
tests/sys/src/UtilityProcess.hx

@@ -0,0 +1,152 @@
+/**
+	Used by TestUnicode.
+	Runs a given simple program based on the first argument.
+ */
+
+import haxe.io.Path;
+import sys.io.Process;
+
+class UtilityProcess {
+	public static var BIN_PATH =
+#if cpp
+		Path.join(["bin", "cpp"]);
+#elseif cs
+		Path.join(["bin", "cs", "bin"]);
+#elseif hl
+		Path.join(["bin", "hl"]);
+#elseif lua
+		Path.join(["bin", "lua"]);
+#elseif (java && jvm)
+		Path.join(["bin", "jvm"]);
+#elseif java
+		Path.join(["bin", "java"]);
+#elseif neko
+		Path.join(["bin", "neko"]);
+#elseif php
+		Path.join(["bin", "php"]);
+#elseif python
+		Path.join(["bin", "python"]);
+#elseif eval
+		Path.join(["src"]);
+#else
+		null;
+#end
+	public static var BIN_NAME =
+#if cpp
+		#if debug
+			"UtilityProcess-debug";
+		#else
+			"UtilityProcess";
+		#end
+#elseif cs
+		#if debug
+			"UtilityProcess-Debug.exe";
+		#else
+			"UtilityProcess.exe";
+		#end
+#elseif hl
+		"UtilityProcess.hl";
+#elseif lua
+		"UtilityProcess.lua";
+#elseif java
+		#if debug
+			"UtilityProcess-Debug.jar";
+		#else
+			"UtilityProcess.jar";
+		#end
+#elseif neko
+		"UtilityProcess.n";
+#elseif php
+		Path.join(["UtilityProcess", "index.php"]);
+#elseif python
+		"UtilityProcess.py";
+#elseif eval
+		"UtilityProcess.hx";
+#else
+		null;
+#end
+
+	public static function runUtility(args:Array<String>, ?options:{?stdin:String, ?execPath:String, ?execName:String}):{
+		exitCode:Int,
+		stdout:String,
+		stderr:String
+	} {
+		if (options == null) options = {};
+		if (options.execPath == null) options.execPath = BIN_PATH;
+		if (options.execName == null) options.execName = BIN_NAME;
+		var execFull = Path.join([options.execPath, options.execName]);
+		var proc =
+		#if (macro || interp)
+		new Process("haxe", ["compile-each.hxml", "-p", options.execPath, "--run", options.execName].concat(args));
+		#elseif cpp
+		new Process(execFull, args);
+		#elseif cs
+		(switch (Sys.systemName()) {
+			case "Windows":
+				new Process(execFull, args);
+			case _:
+				new Process("mono", [execFull].concat(args));
+		});
+		#elseif java
+		new Process(Path.join([java.lang.System.getProperty("java.home"), "bin", "java"]), ["-jar", execFull].concat(args));
+		#elseif python
+		new Process(python.lib.Sys.executable, [execFull].concat(args));
+		#elseif neko
+		new Process("neko", [execFull].concat(args));
+		#elseif hl
+		new Process("hl", [execFull].concat(args));
+		#elseif php
+		new Process(php.Global.defined('PHP_BINARY') ? php.Const.PHP_BINARY : 'php', [execFull].concat(args));
+		#elseif lua
+		new Process("lua", [execFull].concat(args));
+		#else
+		null;
+		#end
+		if (options.stdin != null) {
+			proc.stdin.writeString(options.stdin);
+			proc.stdin.flush();
+		}
+		var exitCode = proc.exitCode();
+		var stdout = proc.stdout.readAll().toString();
+		var stderr = proc.stderr.readAll().toString();
+		proc.close();
+		return {
+			exitCode: exitCode,
+			stdout: stdout,
+			stderr: stderr
+		};
+	}
+	
+	public static function main():Void {
+		var args = Sys.args();
+		function sequenceIndex(d:String, mode:String):String return (switch (UnicodeSequences.valid[Std.parseInt(d)]) {
+				case Only(ref): UnicodeSequences.codepointsToString(ref);
+				case Normal(nfc, nfd): UnicodeSequences.codepointsToString(mode == "nfc" ? nfc : nfd);
+			});
+		switch (args) {
+			case _.slice(0, 1) => ["putEnv"]:
+			// ["putEnv", var name, index, nfc mode, next args...]
+			Sys.putEnv(args[1], sequenceIndex(args[2], args[3]));
+			var out = runUtility(args.slice(4));
+			Sys.print(out.stdout);
+			Sys.exit(out.exitCode);
+			case ["getCwd"]: Sys.println(Sys.getCwd());
+			case ["getEnv", name]: Sys.println(Sys.getEnv(name));
+			case ["environment", name]: Sys.println(Sys.environment().get(name));
+			case ["exitCode", Std.parseInt(_) => code]: Sys.exit(code);
+			case ["args", data]: Sys.println(data);
+			case ["println", d, mode]: Sys.println(sequenceIndex(d, mode));
+			case ["print", d, mode]: Sys.print(sequenceIndex(d, mode));
+			case ["trace", d, mode]: trace(sequenceIndex(d, mode));
+			case ["stdin.readLine"]: Sys.println(Sys.stdin().readLine());
+			case ["stdin.readString", Std.parseInt(_) => len]: Sys.println(Sys.stdin().readString(len, UTF8));
+			case ["stdin.readUntil", Std.parseInt(_) => end]: Sys.println(Sys.stdin().readUntil(end));
+			case ["stderr.writeString", d, mode]:
+			var stream = Sys.stderr(); stream.writeString(sequenceIndex(d, mode)); stream.flush();
+			case ["stdout.writeString", d, mode]:
+			var stream = Sys.stdout(); stream.writeString(sequenceIndex(d, mode)); stream.flush();
+			case ["programPath"]: Sys.println(Sys.programPath());
+			case _: // no-op
+		}
+	}
+}