فهرست منبع

Updated to zsv 0.3.6.fdcd18e (#13)

* Updated to zsv 0.3.6.fdcd18e

* Updated to mxml 3.3.1.fd47c7d

I'm guessing the stream code was added in? Transferred the stream related code to the latest version of mxml. Seems to be functioning properly according to my test but may need more testing.

* Revert "Updated to mxml 3.3.1.fd47c7d"

This reverts commit cd0f585ffb33d6ba520388ae585ec3e2f16c2d7b.
Carl Husberg 2 سال پیش
والد
کامیت
b8cc1327de

+ 2 - 0
csv.mod/csv.bmx

@@ -31,6 +31,8 @@ ModuleInfo "License: MIT"
 ModuleInfo "zsv - Copyright (c) 2021 Guarnerix Inc dba Liquidaty"
 ModuleInfo "Copyright: 2022-2023 Bruce A Henderson"
 
+ModuleInfo "History: 1.03"
+ModuleInfo "History: Updated to zsv 0.3.6.fdcd18e"
 ModuleInfo "History: 1.02"
 ModuleInfo "History: Updated to zsv 0.3.5.fd7b11a"
 ModuleInfo "History: 1.01"

+ 5 - 3
csv.mod/zsv/README.md

@@ -19,11 +19,13 @@ lib only:
 
 zsv+lib is a fast CSV parser library and extensible command-line utility.
 It achieves high performance using SIMD operations,
-[efficient memory use](docs/memory.md) and other optimization techniques.
+[efficient memory use](docs/memory.md) and other optimization techniques, and
+can also parse generic-delimited and fixed-width formats, as well as multi-row-span headers
 
-The ZSV CLI can be compiled to virtually any target, including [web assembly](examples/js).
+The ZSV CLI can be compiled to virtually any target, including [web assembly](examples/js), and offers features including `select`, `count`, direct CSV `sql`, `flatten`, `serialize`, `2json` conversion, `2db` sqlite3 conversion, `stack`, `pretty`, `2tsv`, `compare` and more.
+
+Pre-built CLI packages are available via brew and nuget
 
-Pre-built CLI packages are available via brew and nuget.
 
 A pre-built library package is available for Node (`npm install zsv-lib`). Please note, this package
 is still in alpha and currently only exposes a small subset of the zsv library capabilities. More

+ 14 - 31
csv.mod/zsv/app/2db.c

@@ -72,9 +72,9 @@ struct zsv_2db_data {
   char *connection_string;
 
   struct {
-    yajl_handle handle;
+//    yajl_handle handle;
     struct yajl_helper_parse_state st;
-    yajl_callbacks callbacks;
+//    yajl_callbacks callbacks;
     yajl_status yajl_stat;
     enum zsv_2db_state state;
 
@@ -153,27 +153,11 @@ static void zsv_2db_delete(zsv_2db_handle data) {
 
   free(data->json_parser.row_values);
 
-
   yajl_helper_parse_state_free(&data->json_parser.st);
-  if(data->json_parser.handle)
-    yajl_free(data->json_parser.handle);
 
   free(data);
 }
 
-static int zsv_2db_json_parse_err(struct zsv_2db_data *data,
-                                  unsigned char *last_parsed_buff,
-                                  size_t last_parsed_buff_len
-                                  ) {
-  unsigned char *str = yajl_get_error(data->json_parser.handle, 1,
-                                      last_parsed_buff, last_parsed_buff_len);
-  if(str) {
-    fprintf(stderr, "Error parsing JSON: %s", (const char *)str);
-    yajl_free_error(data->json_parser.handle, str);
-  }
-  return 1;
-}
-
 /* sqlite3 helper functions */
 
 static int zsv_2db_sqlite3_exec_2db(sqlite3 *db, const char *sql) {
@@ -626,18 +610,17 @@ static zsv_2db_handle zsv_2db_new(struct zsv_2db_options *opts) {
       sqlite3_exec(data->db, "PRAGMA journal_mode = OFF", NULL, NULL, NULL);
 
       // parse the input and create & populate the database table
-      yajl_helper_parse_state_init(&data->json_parser.st, 32,
-                                   json_start_map, json_end_map, json_map_key,
-                                   json_start_array, json_end_array,
-                                   json_process_value,
-                                   data);
-      yajl_helper_callbacks_init(&data->json_parser.callbacks, 1);
-
-      data->json_parser.handle = yajl_alloc(&data->json_parser.callbacks, NULL,
-                                            &data->json_parser.st);
-      if(!data->json_parser.handle) {
+      if(yajl_helper_parse_state_init(&data->json_parser.st, 32,
+                                      json_start_map, json_end_map, json_map_key,
+                                      json_start_array, json_end_array,
+                                      json_process_value,
+                                      data) != yajl_status_ok) {
         fprintf(stderr, "Unable to get yajl parser\n");
         err = 1;
+      } else {
+//        yajl_helper_callbacks_init(&data->json_parser.callbacks, 32);
+//        data->json_parser.handle = st->yajl; // yajl_alloc(&data->json_parser.callbacks, NULL,
+        // &data->json_parser.st);
       }
     }
   }
@@ -683,7 +666,7 @@ static int zsv_2db_finish(zsv_2db_handle data) {
 
 // exportable
 static yajl_handle zsv_2db_yajl_handle(zsv_2db_handle data) {
-  return data->json_parser.handle;
+  return data->json_parser.st.yajl;
 }
 
 int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *zsv_opts, const char *opts_used) {
@@ -770,12 +753,12 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *zs
             break;
           yajl_status stat = yajl_parse(zsv_2db_yajl_handle(data), buff, bytes_read);
           if(stat != yajl_status_ok)
-            err = zsv_2db_json_parse_err(data, buff, bytes_read);
+            err = yajl_helper_print_err(data->json_parser.st.yajl, buff, bytes_read);
         }
 
         if(!err) {
           if(yajl_complete_parse(zsv_2db_yajl_handle(data)) != yajl_status_ok)
-             err = zsv_2db_json_parse_err(data, buff, bytes_read);
+            err = yajl_helper_print_err(data->json_parser.st.yajl, buff, bytes_read);
           else if(zsv_2db_err(data) || zsv_2db_finish(data))
             err = 1;
         }

+ 1 - 1
csv.mod/zsv/app/2json.c

@@ -220,7 +220,7 @@ static int zsv_db2json(const char *input_filename, char **tname, jsonwriter_hand
   if(!*tname)
     fprintf(stderr, "No table name provided, and none found in %s\n", input_filename), err = 1;
   else
-    err = zsv_dbtable2json(db, *tname, jsw);
+    err = zsv_dbtable2json(db, *tname, jsw, 0);
   return err;
 }
 

+ 19 - 11
csv.mod/zsv/app/Makefile

@@ -37,6 +37,13 @@ ifneq ($(ZSV_CACHE_PREFIX),)
   CFLAGS+= -DZSV_CACHE_PREFIX='${ZSV_CACHE_PREFIX}'
 endif
 
+ifneq ($(ZSV_IS_PROP_FILE_HANDLER),)
+	CFLAGS+=-DZSV_IS_PROP_FILE_HANDLER=${ZSV_IS_PROP_FILE_HANDLER}
+endif
+ifneq ($(ZSV_IS_PROP_FILE_DEPTH),)
+	CFLAGS+=-DZSV_IS_PROP_FILE_DEPTH=${ZSV_IS_PROP_FILE_DEPTH}
+endif
+
 DEBUG=0
 WIN=
 ifeq ($(WIN),)
@@ -163,8 +170,8 @@ endif
 
 ZSV=$(BINDIR)/zsv${EXE}
 
-SOURCES= echo count count-pull select select-pull 2tsv 2json serialize flatten pretty stack desc sql 2db compare prop rm jq
-CLI_SOURCES=echo select desc count 2tsv pretty sql flatten 2json serialize stack 2db compare prop rm jq
+SOURCES= echo count count-pull select select-pull 2tsv 2json serialize flatten pretty stack desc sql 2db compare prop rm mv jq
+CLI_SOURCES=echo select desc count 2tsv pretty sql flatten 2json serialize stack 2db compare prop rm mv jq
 
 CFLAGS+= -DUSE_JQ
 
@@ -240,8 +247,8 @@ SQLITE_SRC=${THIS_MAKEFILE_DIR}/external/sqlite3/sqlite3*.c
 SQLITE_EXT=${BUILD_DIR}-external/sqlite3/sqlite3_and_csv_vtab.o
 SQLITE_EXT_INCLUDE=-I${THIS_MAKEFILE_DIR}/external/sqlite3
 
-# everything uses prop, which in turn uses yajl and jq
-OBJECTS+= ${YAJL_OBJ} ${YAJL_HELPER_OBJ}
+# everything uses prop, which in turn uses yajl and jq and json
+OBJECTS+= ${YAJL_OBJ} ${YAJL_HELPER_OBJ} ${BUILD_DIR}/objs/utils/json.o
 MORE_SOURCE+= ${YAJL_INCLUDE} ${YAJL_HELPER_INCLUDE} -I${JQ_INCLUDE_DIR}
 MORE_LIBS+=${JQ_LIB} ${LDFLAGS_JQ}
 
@@ -255,7 +262,7 @@ help:
 	@echo "which will build and test all apps, or to build/test a single app:"
 	@echo "  ${MAKE} test-xx"
 	@echo "where xx is any of:"
-	@echo "  echo count count-pull select select-pull 2tsv 2json serialize flatten pretty stack desc sql 2db prop rm"
+	@echo "  echo count count-pull select select-pull 2tsv 2json serialize flatten pretty stack desc sql 2db prop rm mv"
 	@echo ""
 
 install: ${ZSV}
@@ -368,10 +375,6 @@ ${JSONWRITER_OBJECT}: ${JSONWRITER_SRC}/jsonwriter.c
 # ${STANDALONE_PFX}flatten${EXE} ${STANDALONE_PFX}stack${EXE} ${STANDALONE_PFX}desc${EXE}:
 MORE_SOURCE+=-I${THIS_MAKEFILE_DIR}/external/sglib
 
-# prop uses utils/json
-${CLI} ${STANDALONE_PFX}prop${EXE}: ${BUILD_DIR}/objs/utils/json.o
-${CLI} ${STANDALONE_PFX}prop${EXE}: MORE_OBJECTS+= ${BUILD_DIR}/objs/utils/json.o
-
 # sql, 2db, 2json, echo, compare use sqlite3
 ${CLI} ${STANDALONE_PFX}sql${EXE} ${STANDALONE_PFX}2db${EXE} ${STANDALONE_PFX}2json${EXE} ${STANDALONE_PFX}echo${EXE} ${STANDALONE_PFX}compare${EXE}: ${SQLITE_EXT}
 ${CLI} ${STANDALONE_PFX}sql${EXE} ${STANDALONE_PFX}2db${EXE} ${STANDALONE_PFX}2json${EXE} ${STANDALONE_PFX}echo${EXE} ${STANDALONE_PFX}compare${EXE}: MORE_OBJECTS+=${SQLITE_EXT}
@@ -408,8 +411,13 @@ ${YAJL_HELPER_OBJ}: external/yajl_helper/yajl_helper.c
 	@mkdir -p `dirname "$@"`
 	${CC} ${CFLAGS} -I${BASEDIR}/yajl_helper ${YAJL_INCLUDE} ${YAJL_HELPER_INCLUDE} -c $< -o $@
 
-test:
-	@${MAKE} -C test $@ QUIET=1 LEAKS=${LEAKS} CONFIGFILE=${CONFIGFILEPATH} DEBUG=${DEBUG}
+test:	test-standalone test-cli
+
+test-standalone:
+	@${MAKE} -C test test QUIET=1 LEAKS=${LEAKS} CONFIGFILE=${CONFIGFILEPATH} DEBUG=${DEBUG}
+
+test-cli:  ${CLI}
+	@${MAKE} -C test $@ QUIET=1 LEAKS=${LEAKS} CONFIGFILE=${CONFIGFILEPATH} DEBUG=${DEBUG} CLI=${CLI}
 
 clean-all: clean clean-external clean-obj clean-lib
 

+ 11 - 8
csv.mod/zsv/app/builtin/help.c

@@ -38,16 +38,18 @@ static int main_help(int argc, const char *argv[]) {
     "  -L,--limit-rows <n>: limit processing to the given number of rows (including any header row(s))",
 #endif
     "  -c,--max-column-count <n>: set the maximum number of columns parsed per row. defaults to 1024",
-    "  -r,--max-row-size <n>: set the minimum supported maximum row size. defaults to 64k",
-    "  -B,--buff-size <n>: set internal buffer size. defaults to 256k",
-    "  -t,--tab-delim: set column delimiter to tab",
-    "  -O,--other-delim <char>: set column delimiter to specified character",
-    "  -q,--no-quote: turn off quote handling",
-    "  -R,--skip-head <n>: skip specified number of initial rows",
-    "  -d,--header-row-span <n>: apply header depth (rowspan) of n",
+    "  -r,--max-row-size <n>    : set the minimum supported maximum row size. defaults to 64k",
+    "  -B,--buff-size <n>       : set internal buffer size. defaults to 256k",
+    "  -t,--tab-delim           : set column delimiter to tab",
+    "  -O,--other-delim <char>  : set column delimiter to specified character",
+    "  -q,--no-quote            : turn off quote handling",
+    "  -R,--skip-head <n>       : skip specified number of initial rows",
+    "  -d,--header-row-span <n> : apply header depth (rowspan) of n",
     "  -u,--malformed-utf8-replacement <replacement_string>: replacement string (can be empty) in case of malformed UTF8 input",
     "       (default for \"desc\" commamnd is '?')",
-    "  -S,--keep-blank-headers: disable default behavior of ignoring leading blank rows",
+    "  -S,--keep-blank-headers  : disable default behavior of ignoring leading blank rows",
+    "  -0,--header-row <header> : insert the provided CSV as the first row (in position 0)",
+    "                             e.g. --header-row 'col1,col2,\"my col 3\"'",
     "  -v,--verbose: verbose output",
     "",
     "Commands that parse CSV or other tabular data:",
@@ -70,6 +72,7 @@ static int main_help(int argc, const char *argv[]) {
     "  prop     : save parsing options associated with a file that are subsequently",
     "             applied by default when processing that file",
     "  rm       : remove a file and its related cache",
+    "  mv       : rename (move) a file and/or its related cache",
 #ifdef USE_JQ
     "  jq       : run a jq filter on json input",
 #endif

+ 11 - 2
csv.mod/zsv/app/cli.c

@@ -69,6 +69,7 @@ ZSV_MAIN_DECL(compare);
 ZSV_MAIN_DECL(echo);
 ZSV_MAIN_NO_OPTIONS_DECL(prop);
 ZSV_MAIN_NO_OPTIONS_DECL(rm);
+ZSV_MAIN_NO_OPTIONS_DECL(mv);
 
 #ifdef USE_JQ
 ZSV_MAIN_NO_OPTIONS_DECL(jq);
@@ -99,7 +100,8 @@ struct builtin_cmd builtin_cmds[] = {
   CLI_BUILTIN_COMMAND(compare),
   CLI_BUILTIN_COMMAND(echo),
   CLI_BUILTIN_NO_OPTIONS_COMMAND(prop),
-  CLI_BUILTIN_NO_OPTIONS_COMMAND(rm)
+  CLI_BUILTIN_NO_OPTIONS_COMMAND(rm),
+  CLI_BUILTIN_NO_OPTIONS_COMMAND(mv)
 #ifdef USE_JQ
   , CLI_BUILTIN_NO_OPTIONS_COMMAND(jq)
 #endif
@@ -474,7 +476,14 @@ int ZSV_CLI_MAIN(int argc, const char *argv[]) {
           argv[2],
           "--help"
         };
-        return help_builtin->main(2, argv_tmp);
+        if(help_builtin->main)
+          return help_builtin->main(2, argv_tmp);
+        else if(help_builtin->cmd) {
+          char opts_used[ZSV_OPTS_SIZE_MAX] = { 0 };
+          struct zsv_opts opts = { 0 };
+          return help_builtin->cmd(2, argv_tmp, &opts, opts_used);
+        } else
+          return fprintf(stderr, "Unexpected syntax!\n");
       } else {
         const char *ext_cmd = extension_cmd_from_arg(argv[2]);
         if(ext_cmd) {

+ 112 - 0
csv.mod/zsv/app/mv.c

@@ -0,0 +1,112 @@
+/* Copyright (C) 2022 Guarnerix Inc dba Liquidaty - All Rights Reserved
+ * Unauthorized copying of this file, via any medium is strictly prohibited
+ * Proprietary and confidential
+ * Written by Matt Wong <[email protected]>
+ */
+
+/*
+ * Move a given file and its cache as follows:
+ * 1. check that the destination file doesn't exist. if it does, exit with an error
+ * 2. if a cache exists, check that the destination file cache dir doesn't exist. if it does, exit with an error
+ * 3. move the file. if it fails, exit with an error
+ * 4. move the cache, if it exists. if it fails, attempt to move the file back to its original location, and exit with an error
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h> // unlink()
+#include <errno.h>
+
+#define ZSV_COMMAND_NO_OPTIONS
+#define ZSV_COMMAND mv
+#include "zsv_command.h"
+
+#include <zsv/utils/dirs.h>
+#include <zsv/utils/file.h>
+#include <zsv/utils/cache.h>
+
+const char *zsv_mv_usage_msg[] = {
+  APPNAME ": move a file and its related cache",
+  "",
+  "Usage: " APPNAME " [options] <source> <destination>",
+  "  where options may be:",
+  "    -v,--verbose: verbose output",
+  "    -C,--cache  : only move related cache (not the file)",
+  NULL
+};
+
+static int zsv_mv_usage(FILE *target) {
+  for(int j = 0; zsv_mv_usage_msg[j]; j++)
+    fprintf(target, "%s\n", zsv_mv_usage_msg[j]);
+  return target == stdout ? 0 : 1;
+}
+
+int ZSV_MAIN_NO_OPTIONS_FUNC(ZSV_COMMAND)(int argc, const char *argv[]) {
+  int err = 0;
+  if(argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")))
+    err = zsv_mv_usage(stdout);
+  else if(argc < 2)
+    err = zsv_mv_usage(stderr);
+  else {
+    const char *source = NULL;
+    const char *dest = NULL;
+
+    char move_file = 1;
+    char verbose = 0;
+    for(int i = 1; !err && i < argc; i++) {
+      const char *arg = argv[i];
+      if(*arg == '-') {
+        if(!strcmp(arg, "-v") || !strcmp(arg, "--verbose"))
+          verbose = 1;
+        else if(!strcmp(arg, "-C") || !strcmp(arg, "--cache"))
+          move_file = 0;
+        else
+          err = zsv_printerr(1, "Unrecognized option: %s", arg);
+      } else if(!source)
+        source = arg;
+      else if(!dest)
+        dest = arg;
+      else
+        err = zsv_printerr(1, "Unrecognized option: %s", arg);
+    }
+
+    if(!err) {
+      unsigned char *source_cache_dir = zsv_cache_path((const unsigned char *)source, NULL, 0);
+      unsigned char *dest_cache_dir = zsv_cache_path((const unsigned char *)dest, NULL, 0);
+      if(!source || !dest) {
+        err = zsv_mv_usage(stderr);
+      } else if(move_file && !zsv_file_exists(source)) {
+        err = errno = ENOENT;
+        perror(source);
+      } else if(move_file && zsv_file_exists(dest)) {
+        err = errno = EEXIST;
+        perror(dest);
+      } else if(zsv_dir_exists((const char *)source_cache_dir) && zsv_dir_exists((const char *)dest_cache_dir)) {
+        err = errno = EEXIST;
+        perror((char*)dest_cache_dir);
+        fprintf(stderr, "Use `mv --cache %s <destination>` to move or `rm --cache %s` to remove, then try again\n",
+                dest, dest);
+      } else if(move_file && (verbose ? fprintf(stderr, "Renaming files\n") : 1)
+                && rename(source, dest)) {
+        err = errno;
+        fprintf(stderr, "%s -> %s: ", source, dest);
+        perror(NULL);
+      } else if(zsv_dir_exists((const char *)source_cache_dir) && (verbose ? fprintf(stderr, "Moving caches\n") : 1)
+                && rename((char*)source_cache_dir, (char*)dest_cache_dir)) {
+        err = errno;
+        fprintf(stderr, "%s -> %s: ", source_cache_dir, dest_cache_dir);
+        perror(NULL);
+
+        // try to revert the prior rename
+        if(rename(dest, source)) {
+          fprintf(stderr, "%s -> %s: ", dest, source);
+          perror(NULL);
+        }
+      }
+      free(source_cache_dir);
+      free(dest_cache_dir);
+    }
+  }
+  return err;
+}

+ 19 - 1
csv.mod/zsv/app/pretty.c

@@ -293,7 +293,25 @@ void zsv_pretty_write_cell(unsigned char *buff, size_t bytes, struct zsv_pretty_
                                                                            &used_width, &utf8_err);
       }
       if(bytes_to_print) {
-        data->write(buff, 1, bytes_to_print, data->write_arg);
+        if((data->markdown || data->markdown_pad)
+           && (memchr(buff, '|', bytes_to_print)
+               || memchr(buff, '\\', bytes_to_print))
+           ) {
+          char *tmp = malloc(bytes_to_print*2);
+          if(!tmp)
+            data->parser_status = zsv_status_memory;
+          else {
+            size_t tmp_len = 0;
+            for(size_t i = 0; i < bytes_to_print; i++) {
+              if(memchr("|\\", buff[i], 2))
+                tmp[tmp_len++] = '\\';
+              tmp[tmp_len++] = buff[i];
+            }
+            data->write(tmp, 1, tmp_len, data->write_arg);
+            free(tmp);
+          }
+        } else
+          data->write(buff, 1, bytes_to_print, data->write_arg);
         data->line.printed += used_width;
 
         if(ellipsis) {

+ 444 - 38
csv.mod/zsv/app/prop.c

@@ -15,7 +15,7 @@
 #include <inttypes.h>
 #include <limits.h>
 #include <errno.h>
-#include <yajl_helper.h>
+#include <unistd.h> // unlink, access
 
 #define ZSV_COMMAND_NO_OPTIONS
 #define ZSV_COMMAND prop
@@ -24,6 +24,8 @@
 #include <zsv/utils/os.h>
 #include <zsv/utils/file.h>
 #include <zsv/utils/json.h>
+#include <zsv/utils/jq.h>
+#include <zsv/utils/dirs.h>
 #include <zsv/utils/cache.h>
 #include <zsv/utils/string.h>
 
@@ -32,16 +34,26 @@ const char *zsv_property_usage_msg[] = {
   "          saved options will be applied by default when processing that file",
   "",
   "Usage: " APPNAME " <filepath> [options]",
-  "  where filepath is the path to the input CSV file (or when using --auto, - for stdin)",
+  "  where filepath is the path to the input CSV file, or",
+  "    when using --auto: input CSV file or - for stdin",
+  "    when using --clean: directory to clean from (use '.' for current directory)",
   "  and options may be one or more of:",
   "    -d,--header-row-span <value>: set/unset/auto-detect header depth (see below)",
   "    -R,--skip-head <value>      : set/unset/auto-detect initial rows to skip (see below)",
-  "    --clear                     : delete all properties",
+  "    --list-files                : list all property sets associted with the given file", // output a list of all cache files
+  "    --clear                     : delete all properties of the specified file",
+  // TO DO: --clear-file relative-path
+  "    --clean                     : delete all files / dirs in the property cache of the given directory",
+  "                                  that do not have a corresponding file in that directory",
+  "      --dry                     : dry run, outputs files/dirs to remove. only for use with --clean",
   "    --auto                      : guess the best property values. This is equivalent to:",
   "                                    -d auto -R auto",
   "                                  when using this option, a dash (-) can be used instead",
   "                                  of a filepath to read from stdin",
   "    --save [-f,--overwrite]     : (only applicable with --auto) save the detected result",
+  "    --copy <dest filepath>      : copy properties to another file", // to do: opt to check valid JSON
+  "    --export <output path>      : export all properties to a single JSON file (- for stdout)", // to do: opt to check valid JSON
+  "    --import <input path>       : import properties from a single JSON file (- for stdin)", // to do: opt to check valid JSON
   "    -f,--overwrite              : overwrite any previously-saved properties",
   "",
   "For --header-row-span or --skip-head options, <value> can be:",
@@ -68,6 +80,8 @@ static int zsv_property_usage(FILE *target) {
 
 static int show_all_properties(const unsigned char *filepath) {
   int err = 0;
+  // to do: show all files, not just prop file
+
   if(!zsv_file_readable((const char *)filepath, &err, NULL)) {
     perror((const char *)filepath);
     return err;
@@ -494,20 +508,370 @@ static int merge_and_save_properties(const unsigned char *filepath,
   return err;
 }
 
+enum zsv_prop_mode {
+  zsv_prop_mode_default = 0,
+  zsv_prop_mode_list_files = 'l',
+  zsv_prop_mode_clean = 'K',
+  zsv_prop_mode_export = 'e',
+  zsv_prop_mode_import = 'i',
+  zsv_prop_mode_copy = 'c',
+  zsv_prop_mode_clear = 'r'
+};
+
+static enum zsv_prop_mode zsv_prop_get_mode(const char *opt) {
+  if(!strcmp(opt, "--clean")) return zsv_prop_mode_clean;
+  if(!strcmp(opt, "--list-files")) return zsv_prop_mode_list_files;
+  if(!strcmp(opt, "--copy")) return zsv_prop_mode_copy;
+  if(!strcmp(opt, "--export")) return zsv_prop_mode_export;
+  if(!strcmp(opt, "--import")) return zsv_prop_mode_import;
+  if(!strcmp(opt, "--clear")) return zsv_prop_mode_clear;
+  return zsv_prop_mode_default;
+}
+
+struct prop_opts {
+  int64_t d; // ZSV_PROP_ARG_AUTO, ZSV_PROP_ARG_REMOVE or > 0
+  int64_t R; // ZSV_PROP_ARG_AUTO, ZSV_PROP_ARG_REMOVE or > 0
+  unsigned char clear:1;
+  unsigned char save:1;
+  unsigned char overwrite:1;
+  unsigned char _:3;
+};
+
+static int zsv_prop_execute_default(const unsigned char *filepath, struct zsv_opts zsv_opts, struct prop_opts opts) {
+  int err = 0;
+  struct zsv_file_properties fp = { 0 };
+  if(opts.d >= 0 || opts.R >= 0 || opts.d == ZSV_PROP_ARG_REMOVE || opts.R == ZSV_PROP_ARG_REMOVE)
+    opts.overwrite = 1;
+  if(opts.d == ZSV_PROP_ARG_AUTO || opts.R == ZSV_PROP_ARG_AUTO) {
+    err = detect_properties(filepath, &fp,
+                            opts.d == ZSV_PROP_ARG_AUTO,
+                            opts.R == ZSV_PROP_ARG_AUTO,
+                            &zsv_opts);
+  }
+
+  if(!err) {
+    if(opts.d == ZSV_PROP_ARG_AUTO)
+      opts.d = fp.header_span;
+
+    if(opts.R == ZSV_PROP_ARG_AUTO)
+      opts.R = fp.skip;
+    err = merge_and_save_properties(filepath, opts.save, opts.overwrite, opts.d, opts.R);
+  }
+  return err;
+}
+
+int zsv_is_prop_file(struct zsv_foreach_dirent_handle *h, size_t depth) {
+  return depth == 1 && !strcmp(h->entry, "props.json");
+}
+
+#ifdef ZSV_IS_PROP_FILE_HANDLER
+int ZSV_IS_PROP_FILE_HANDLER(struct zsv_foreach_dirent_handle *, size_t);
+#endif
+
+struct zsv_dir_filter *
+zsv_prop_get_or_set_is_prop_file(
+                                 int (*custom_is_prop_file)(struct zsv_foreach_dirent_handle *, size_t),
+                                 int max_depth,
+                                 char set
+                                 ) {
+  static struct zsv_dir_filter ctx = {
+#ifndef ZSV_IS_PROP_FILE_HANDLER
+    .filter = zsv_is_prop_file,
+#else
+    .filter = ZSV_IS_PROP_FILE_HANDLER,
+#endif
+#ifndef ZSV_IS_PROP_FILE_DEPTH
+    .max_depth = 1
+#else
+    .max_depth = ZSV_IS_PROP_FILE_DEPTH
+#endif
+  };
+
+  if(set) {
+    if(!(ctx.filter = custom_is_prop_file)) {
+      ctx.filter = zsv_is_prop_file;
+      max_depth = 1;
+    } else
+      ctx.max_depth = max_depth;
+  }
+  return &ctx;
+}
+
+static int zsv_prop_foreach_list(struct zsv_foreach_dirent_handle *h, size_t depth) {
+  if(!h->is_dir) {
+    struct zsv_dir_filter *ctx = (struct zsv_dir_filter *)h->ctx;
+    h->ctx = ctx->ctx;
+    if(ctx->filter(h, depth))
+      printf("%s\n", h->entry);
+    h->ctx = ctx;
+  }
+  return 0;
+}
+
+zsv_foreach_dirent_handler
+zsv_prop_get_or_set_is_prop_dir(
+                                int (*custom_is_prop_dir)(struct zsv_foreach_dirent_handle *, size_t),
+                                char set
+                                ) {
+  static int (*func)(struct zsv_foreach_dirent_handle *, size_t) = NULL;
+  if(set)
+    func = custom_is_prop_dir;
+  return func;
+}
+
+static int zsv_prop_execute_list_files(const unsigned char *filepath, char verbose) {
+  int err = 0;
+  unsigned char *cache_path = zsv_cache_path(filepath, NULL, 0);
+  struct zsv_dir_filter ctx = *zsv_prop_get_or_set_is_prop_file(NULL, 0, 0);
+  if(cache_path) {
+    zsv_foreach_dirent((const char *)cache_path, ctx.max_depth, zsv_prop_foreach_list,
+                       &ctx, verbose);
+    free(cache_path);
+  }
+  return err;
+}
+
+struct zsv_prop_foreach_clean_ctx {
+  const char *dirpath;
+  unsigned char dry;
+};
+
+static int zsv_prop_foreach_clean(struct zsv_foreach_dirent_handle *h, size_t depth) {
+  int err = 0;
+  if(depth == 1) {
+    struct zsv_prop_foreach_clean_ctx *ctx = h->ctx;
+    if(h->is_dir) {
+      // h->entry is the name of the top-level file that this folder relates to
+      // make sure that the top-level file exists
+      h->no_recurse = 1;
+
+      char *cache_owner_path;
+      asprintf(&cache_owner_path, "%s%c%s", ctx->dirpath, FILESLASH, h->entry);
+      if(!cache_owner_path) {
+        fprintf(stderr, "Out of memory!\n");
+        return 1;
+      }
+      if(!zsv_file_exists(cache_owner_path)) {
+        if(ctx->dry)
+          printf("Orphaned: %s\n", h->parent_and_entry);
+        else
+          err = zsv_remove_dir_recursive((const unsigned char *)h->parent_and_entry);
+      }
+      free(cache_owner_path);
+    } else {
+      // there should be no files at depth 1, so just delete
+      if(ctx->dry)
+        printf("Unrecognized: %s\n", h->parent_and_entry);
+      else if(unlink(h->parent_and_entry)) {
+        perror(h->parent_and_entry);
+        err = 1;
+      }
+    }
+  }
+  return err;
+}
+
+enum zsv_prop_foreach_copy_mode {
+  zsv_prop_foreach_copy_mode_check = 1,
+  zsv_prop_foreach_copy_mode_copy
+};
+
+struct zsv_prop_foreach_copy_ctx {
+  struct zsv_dir_filter zsv_dir_filter;
+  const unsigned char *src_cache_dir;
+  const unsigned char *dest_cache_dir;
+  enum zsv_prop_foreach_copy_mode mode;
+  int err;
+  unsigned char output_started:1;
+  unsigned char force:1;
+  unsigned char dry:1;
+  unsigned char _:5;
+};
+
+static int zsv_prop_foreach_copy(struct zsv_foreach_dirent_handle *h, size_t depth) {
+  if(!h->is_dir) {
+    struct zsv_prop_foreach_copy_ctx *ctx = h->ctx;
+    h->ctx = ctx->zsv_dir_filter.ctx;
+    if(ctx->zsv_dir_filter.filter(h, depth)) {
+      char *dest_prop_filepath;
+      asprintf(&dest_prop_filepath, "%s%s", ctx->dest_cache_dir, h->parent_and_entry + strlen((const char *)ctx->src_cache_dir));
+      if(!dest_prop_filepath) {
+        ctx->err = errno = ENOMEM;
+        perror(NULL);
+      } else {
+        switch(ctx->mode) {
+        case zsv_prop_foreach_copy_mode_check:
+          {
+            if(!zsv_file_readable(h->parent_and_entry, &ctx->err, NULL)) { // check if source is not readable
+              perror(h->parent_and_entry);
+            } else if(!ctx->force && access(dest_prop_filepath, F_OK) != -1) { // check if dest already exists
+              ctx->err = EEXIST;
+              if(!ctx->output_started) {
+                ctx->output_started = 1;
+                const char *msg = strerror(EEXIST);
+                fprintf(stderr, "%s:\n", msg ? msg : "File already exists");
+              }
+              fprintf(stderr, "  %s\n", dest_prop_filepath);
+            } else if(ctx->dry)
+              printf("%s => %s\n", h->parent_and_entry, dest_prop_filepath);
+          }
+          break;
+        case zsv_prop_foreach_copy_mode_copy:
+          if(!ctx->dry) {
+            char *dest_prop_filepath_tmp;
+            asprintf(&dest_prop_filepath_tmp, "%s.temp", dest_prop_filepath);
+            if(!dest_prop_filepath_tmp) {
+              ctx->err = errno = ENOMEM;
+              perror(NULL);
+            } else {
+              if(h->verbose)
+                fprintf(stderr, "Copying temp: %s => %s\n", h->parent_and_entry, dest_prop_filepath_tmp);
+              int err = zsv_copy_file(h->parent_and_entry, dest_prop_filepath_tmp);
+              if(err)
+                ctx->err = err;
+              else {
+                if(h->verbose)
+                  fprintf(stderr, "Renaming: %s => %s\n", dest_prop_filepath_tmp, dest_prop_filepath);
+                if(rename(dest_prop_filepath_tmp, dest_prop_filepath)) {
+                  const char *msg = strerror(errno);
+                  fprintf(stderr, "Unable to rename %s -> %s: %s\n", dest_prop_filepath_tmp, dest_prop_filepath, msg ? msg : "Unknown error");
+                  ctx->err = errno;
+                }
+              }
+              free(dest_prop_filepath_tmp);
+            }
+          }
+          break;
+        }
+        free(dest_prop_filepath);
+      }
+    }
+    h->ctx = ctx;
+  }
+  return 0;
+}
+
+static int zsv_prop_execute_copy(const char *src, const char *dest, unsigned char force, unsigned char dry, unsigned char verbose) {
+  int err = 0;
+  unsigned char *src_cache_dir = zsv_cache_path((const unsigned char *)src, NULL, 0);
+  unsigned char *dest_cache_dir = zsv_cache_path((const unsigned char *)dest, NULL, 0);
+
+  if(!(src_cache_dir && dest_cache_dir))
+    err = errno = ENOMEM, perror(NULL);
+  else {
+    // if !force, only proceed if:
+    // - src exists (file)
+    // - dest exists (file)
+    // - dest file property cache d.n. have conflicts
+    struct zsv_prop_foreach_copy_ctx ctx = { 0 };
+    ctx.zsv_dir_filter = *zsv_prop_get_or_set_is_prop_file(NULL, 0, 0);
+    ctx.dest_cache_dir = dest_cache_dir;
+    ctx.src_cache_dir = src_cache_dir;
+    ctx.force = force;
+    ctx.dry = dry;
+
+    if(!force) {
+      if(!zsv_file_exists(src))
+        err = errno = ENOENT, perror(src);
+      if(!zsv_file_exists(dest))
+        err = errno = ENOENT, perror(dest);
+    }
+
+    if(!err) {
+      // for each property file, check if dest has same-named property file
+      ctx.mode = zsv_prop_foreach_copy_mode_check;
+      zsv_foreach_dirent((const char *)src_cache_dir, ctx.zsv_dir_filter.max_depth, zsv_prop_foreach_copy,
+                         &ctx, verbose);
+    }
+
+    if(!err && !(ctx.err && !force)) {
+      // copy the files
+      ctx.mode = zsv_prop_foreach_copy_mode_copy;
+      zsv_foreach_dirent((const char *)src_cache_dir, ctx.zsv_dir_filter.max_depth, zsv_prop_foreach_copy,
+                         &ctx, verbose);
+    }
+  }
+  free(src_cache_dir);
+  free(dest_cache_dir);
+  return err;
+}
+
+static int zsv_prop_execute_clean(const char *dirpath, unsigned char dry, unsigned char verbose) {
+  // TO DO: if ZSV_CACHE_DIR-tmp exists, delete it (file or dir)
+  int err = 0;
+  size_t dirpath_len = strlen(dirpath);
+  while(dirpath_len && memchr("/\\", dirpath[dirpath_len-1], 2) != NULL)
+    dirpath_len--;
+  if(!dirpath_len)
+    return 0;
+
+  // TO DO: if NO_STDIN, require --force, else prompt user
+
+  char *cache_parent;
+  if(!strcmp(dirpath, "."))
+    cache_parent = strdup(ZSV_CACHE_DIR);
+  else
+    asprintf(&cache_parent, "%.*s%c%s", (int)dirpath_len, dirpath, FILESLASH, ZSV_CACHE_DIR);
+  if(!cache_parent) {
+    fprintf(stderr, "Out of memory!\n");
+    return 1;
+  }
+
+  struct zsv_prop_foreach_clean_ctx ctx = { 0 };
+  ctx.dirpath = dirpath;
+  ctx.dry = dry;
+
+  zsv_foreach_dirent(cache_parent, 0, zsv_prop_foreach_clean, &ctx, verbose);
+  free(cache_parent);
+  return err;
+}
+
+static int zsv_prop_execute_export(const char *src, const char *dest, unsigned char verbose) {
+  int err = 0;
+  unsigned char *parent_dir = zsv_cache_path((const unsigned char *)src, NULL, 0);
+  if(!(parent_dir))
+    err = errno = ENOMEM, perror(NULL);
+  else {
+    struct zsv_dir_filter zsv_dir_filter = *zsv_prop_get_or_set_is_prop_file(NULL, 0, 0);
+    err = zsv_dir_to_json(parent_dir, (const unsigned char *)dest, &zsv_dir_filter, verbose);
+  }
+  free(parent_dir);
+  return err;
+}
+
+static int zsv_prop_execute_import(const char *dest, const char *src, unsigned char force,
+                                   unsigned char dry, unsigned char verbose) {
+  int err = 0;
+  unsigned char *target_dir = NULL;
+  FILE *fsrc = NULL;
+  if(!force && !zsv_file_exists(dest)) {
+    err = errno = ENOENT;
+    perror(dest);
+  } else if(!(target_dir = zsv_cache_path((const unsigned char *)dest, NULL, 0))) {
+    err = errno = ENOMEM;
+    perror(NULL);
+  } else if(!(fsrc = src ? fopen(src, "rb") : stdin)) {
+    err = errno;
+    perror(src);
+  } else {
+    int flags = (force ? ZSV_DIR_FLAG_FORCE : 0) | (dry ? ZSV_DIR_FLAG_DRY : 0);
+    err = zsv_dir_from_json(target_dir, fsrc, flags, verbose);
+  }
+  if(fsrc && fsrc != stdin)
+    fclose(fsrc);
+  free(target_dir);
+  return err;
+}
+
+
+
 int ZSV_MAIN_NO_OPTIONS_FUNC(ZSV_COMMAND)(int m_argc, const char *m_argv[]) {
   int err = 0;
+  char verbose = 0;
   if(m_argc < 2 ||
      (m_argc > 1 && (!strcmp(m_argv[1], "-h") || !strcmp(m_argv[1], "--help"))))
     err = zsv_property_usage(stdout);
   else {
-    struct prop_opts {
-      int64_t d; // ZSV_PROP_ARG_AUTO, ZSV_PROP_ARG_REMOVE or > 0
-      int64_t R; // ZSV_PROP_ARG_AUTO, ZSV_PROP_ARG_REMOVE or > 0
-      unsigned char clear:1;
-      unsigned char save:1;
-      unsigned char overwrite:1;
-      unsigned char _:3;
-    };
     struct prop_opts opts = { 0 };
     opts.d = ZSV_PROP_ARG_NONE;
     opts.R = ZSV_PROP_ARG_NONE;
@@ -516,18 +880,32 @@ int ZSV_MAIN_NO_OPTIONS_FUNC(ZSV_COMMAND)(int m_argc, const char *m_argv[]) {
     if(m_argc == 2)
       return show_all_properties(filepath);
 
-    if(m_argc == 3 && !strcmp("--clear", m_argv[2]))
-      return zsv_cache_remove(filepath, zsv_cache_type_property);
-
+    enum zsv_prop_mode mode = zsv_prop_mode_default;
+    unsigned char dry = 0;
+    const char *mode_arg = NULL;   // e.g. "--export"
+    const char *mode_value = NULL; // e.g. "saved_export.json"
     for(int i = 2; !err && i < m_argc; i++) {
       const char *opt = m_argv[i];
-      if(!strcmp(opt, "-d") || !strcmp(opt, "--header-row-span"))
+      if(!strcmp(opt, "-v") || !strcmp(opt, "--verbose"))
+        verbose = 1;
+      else if(!strcmp(opt, "-d") || !strcmp(opt, "--header-row-span"))
         err = prop_arg_value(++i, m_argc, m_argv, &opts.d);
       else if(!strcmp(opt, "-R") || !strcmp(opt, "--skip-head"))
         err = prop_arg_value(++i, m_argc, m_argv, &opts.R);
-      else if(!strcmp(opt, "--clear"))
-        err = fprintf(stderr, "--clear cannot be used in conjunction with any other options\n");
-      else if(!strcmp(opt, "--auto")) {
+      else if(zsv_prop_get_mode(opt)) {
+        if(mode_arg)
+          err = fprintf(stderr, "Option %s cannot be used together with %s\n", opt, mode_arg);
+        else {
+          mode = zsv_prop_get_mode(opt);
+          mode_arg = opt;
+          if(mode == zsv_prop_mode_export || mode == zsv_prop_mode_import || mode == zsv_prop_mode_copy) {
+            if(++i < m_argc)
+              mode_value = m_argv[i];
+            else
+              err = fprintf(stderr, "Option %s requires a value\n", opt);
+          }
+        }
+      } else if(!strcmp(opt, "--auto")) {
         if(opts.d != ZSV_PROP_ARG_NONE && opts.R != ZSV_PROP_ARG_NONE)
           err = fprintf(stderr, "--auto specified, but all other properties also specified");
         else {
@@ -540,6 +918,8 @@ int ZSV_MAIN_NO_OPTIONS_FUNC(ZSV_COMMAND)(int m_argc, const char *m_argv[]) {
         opts.save = 1;
       else if(!strcmp(opt, "-f") || !strcmp(opt, "--overwrite"))
         opts.overwrite = 1;
+      else if(!strcmp(opt, "--dry"))
+        dry = 1;
       else {
         fprintf(stderr, "Unrecognized option: %s\n", opt);
         err = 1;
@@ -547,6 +927,7 @@ int ZSV_MAIN_NO_OPTIONS_FUNC(ZSV_COMMAND)(int m_argc, const char *m_argv[]) {
     }
 
     // check if option combination is invalid
+    // to do: check with zsv_prop_mode_clear
     if(!err) {
       char have_auto = opts.d == ZSV_PROP_ARG_AUTO || opts.R == ZSV_PROP_ARG_AUTO;
       char have_specified = opts.d >= 0 || opts.R >= 0;
@@ -555,7 +936,9 @@ int ZSV_MAIN_NO_OPTIONS_FUNC(ZSV_COMMAND)(int m_argc, const char *m_argv[]) {
       if(have_auto && (have_specified || have_remove)) {
         fprintf(stderr, "Non-auto options may not be mixed with auto options\n");
         err = 1;
-      }
+      } else if((have_auto || have_specified || have_remove || opts.save)
+                && mode != zsv_prop_mode_default)
+        err = fprintf(stderr, "Invalid options in combination with %s\n", mode_arg);
 
       if(have_specified || have_remove) {
         opts.save = 1;
@@ -564,25 +947,48 @@ int ZSV_MAIN_NO_OPTIONS_FUNC(ZSV_COMMAND)(int m_argc, const char *m_argv[]) {
     }
 
     if(!err) {
-      struct zsv_file_properties fp = { 0 };
-      if(opts.d >= 0 || opts.R >= 0 || opts.d == ZSV_PROP_ARG_REMOVE || opts.R == ZSV_PROP_ARG_REMOVE)
-        opts.overwrite = 1;
-      if(opts.d == ZSV_PROP_ARG_AUTO || opts.R == ZSV_PROP_ARG_AUTO) {
-        struct zsv_opts zsv_opts;
-        zsv_args_to_opts(m_argc, m_argv, &m_argc, m_argv, &zsv_opts, NULL);
-        err = detect_properties(filepath, &fp,
-                                opts.d == ZSV_PROP_ARG_AUTO,
-                                opts.R == ZSV_PROP_ARG_AUTO,
-                                &zsv_opts);
-      }
-
-      if(!err) {
-        if(opts.d == ZSV_PROP_ARG_AUTO)
-          opts.d = fp.header_span;
-
-        if(opts.R == ZSV_PROP_ARG_AUTO)
-          opts.R = fp.skip;
-        err = merge_and_save_properties(filepath, opts.save, opts.overwrite, opts.d, opts.R);
+      switch(mode) {
+      case zsv_prop_mode_clear:
+        if(!(filepath && *filepath))
+          err = fprintf(stderr, "--clear: please specify an input file\n");
+        else {
+          struct prop_opts opts2 = { 0 };
+          opts2.d = ZSV_PROP_ARG_NONE;
+          opts2.R = ZSV_PROP_ARG_NONE;
+          if(memcmp(&opts, &opts2, sizeof(opts)))
+            err = fprintf(stderr, "--clear cannot be used in conjunction with any other options\n");
+          else {
+            unsigned char *cache_path = zsv_cache_path(filepath, NULL, 0);
+            if(!cache_path)
+              err = ENOMEM;
+            else if(zsv_dir_exists((const char *)cache_path))
+              err = zsv_remove_dir_recursive(cache_path);
+            free(cache_path);
+          }
+        }
+        break;
+      case zsv_prop_mode_list_files:
+        err = zsv_prop_execute_list_files(filepath, verbose);
+        break;
+      case zsv_prop_mode_clean:
+        err = zsv_prop_execute_clean((const char *)filepath, dry, verbose);
+        break;
+      case zsv_prop_mode_copy:
+        err = zsv_prop_execute_copy((const char *)filepath, mode_value, opts.overwrite, dry, verbose);
+        break;
+      case zsv_prop_mode_export:
+        err = zsv_prop_execute_export((const char *)filepath, mode_value && strcmp(mode_value, "-") ? mode_value : NULL, verbose);
+        break;
+      case zsv_prop_mode_import:
+        err = zsv_prop_execute_import((const char *)filepath, mode_value && strcmp(mode_value, "-") ? mode_value : NULL, opts.overwrite, dry, verbose);
+        break;
+      case zsv_prop_mode_default:
+        {
+          struct zsv_opts zsv_opts;
+          zsv_args_to_opts(m_argc, m_argv, &m_argc, m_argv, &zsv_opts, NULL);
+          err = zsv_prop_execute_default(filepath, zsv_opts, opts);
+        }
+        break;
       }
     }
   }

+ 4 - 5
csv.mod/zsv/app/rm.c

@@ -27,9 +27,9 @@
 const char *zsv_rm_usage_msg[] = {
   APPNAME ": remove a file and its related cache",
   "",
-  "Usage: " APPNAME " <filepath> <options>",
+  "Usage: " APPNAME " [options] <filepath>",
   "  where options may be:",
-  "    -v,--verbose: do not prompt for confirmation",
+  "    -v,--verbose: verbose output",
 #ifndef NO_STDIN
   "    -f,--force  : do not prompt for confirmation",
 #endif
@@ -107,11 +107,10 @@ int ZSV_MAIN_NO_OPTIONS_FUNC(ZSV_COMMAND)(int argc, const char *argv[]) {
             fprintf(stderr, "Removing %s", filepath);
           err = unlink(filepath);
           if(err) {
-            perror(filepath);
-            if(force)
+            if(err == ENOENT && force)
               err = 0;
             else
-              fprintf(stderr, "Cached files (if any) not removed\n");
+              perror(filepath);
           }
         }
         if(!err) {

+ 0 - 8
csv.mod/zsv/app/select-pull.c

@@ -585,7 +585,6 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
   const char *input_path = NULL;
   struct zsv_csv_writer_options writer_opts = zsv_writer_get_default_opts();
   int col_index_arg_i = 0;
-  const char *insert_header_row = NULL;
   enum zsv_status stat = zsv_status_ok;
   for(int arg_i = 1; stat == zsv_status_ok && arg_i < argc; arg_i++) {
     if(!strcmp(argv[arg_i], "--")) {
@@ -655,12 +654,6 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
       data.whitspace_clean_flags = 1;
     } else if(!strcmp(argv[arg_i], "-W") || !strcmp(argv[arg_i], "--no-trim")) {
       data.no_trim_whitespace = 1;
-    } else if(!strcmp(argv[arg_i], "--header-row")) {
-      arg_i++;
-      if(!(arg_i < argc))
-        stat = zsv_printerr(1, "%s option requires a header row value such as 'column_name1,\"column name 2\"'", argv[arg_i-1]);
-      else
-        insert_header_row = argv[arg_i];
     } else if(!strcmp(argv[arg_i], "--sample-every")) {
       arg_i++;
       if(!(arg_i < argc))
@@ -748,7 +741,6 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
       stat = zsv_status_memory;
     else {
       zsv_parser parser;
-      data.opts->insert_header_row = insert_header_row;
       if(zsv_new_with_properties(data.opts, input_path, opts_used, &parser)
          == zsv_status_ok) {
         // all done with

+ 89 - 64
csv.mod/zsv/app/select.c

@@ -86,7 +86,7 @@ struct zsv_select_data {
 
   double sample_pct;
 
-  unsigned char sample_every_n;
+  unsigned sample_every_n;
 
   size_t data_rows_limit;
   size_t skip_data_rows;
@@ -529,14 +529,11 @@ const char *zsv_select_usage_msg[] = {
 #endif
   "  -H,--head <n>               : (head) only process the first n rows of data",
   "                                selected from all rows in the input",
-  "  --header-row <header row>   : insert the provided CSV as the first row",
-  "                                e.g. --header-row 'col1,col2,\"my col 3\"'",
   "  -s,--search <value>         : only output rows with at least one cell containing"
   "                                value",
   // to do: " -s,--search /<pattern>/modifiers: search on regex pattern; modifiers include 'g' (global) and 'i' (case-insensitive)",
   "  --sample-every <num of rows>: output a sample consisting of the first row, then every nth row",
   "  --sample-pct <percentage>   : output a randomly-selected sample (32 bits of randomness) of n percent of the input rows",
-  "  -d,--header-row-span <n>    : apply header depth (rowspan) of n",
   "  --distinct                  : skip subsequent occurrences of columns with the same name",
   "  --merge                     : merge subsequent occurrences of columns with the same",
   "                                name, outputting first non-null value",
@@ -601,68 +598,96 @@ static void zsv_select_cleanup(struct zsv_select_data *data) {
  * ----COLUMN1----COLUMN2-----COLUMN3----
  *
  * Approach:
- * - find each instance of white followed by not-white, but ignore the first instance of it
+ * - read the first [256k] of data [to do: alternatively, read only the first line]
+ * - merge all read lines into a single line where line[i] = 'x' for each position i at which
+ *   a non-white char appeared in any scanned line
+ * - from the merged line, find each instance of white followed by not-white,
+ *   but ignore the first instance of it
  */
-static enum zsv_status auto_detect_fixed_column_sizes(struct fixed *fixed, struct zsv_opts *opts, char **scanned, char verbose) {
+static enum zsv_status auto_detect_fixed_column_sizes(struct fixed *fixed, struct zsv_opts *opts,
+                                                      unsigned char *buff, size_t buffsize, size_t *buff_used,
+                                                      char verbose) {
+  char only_first_line = 0; // to do: make this an option
+  enum zsv_status stat = zsv_status_ok;
+
   fixed->count = 0;
-  unsigned buffsize = 1024*1024; // 1MB
-  char *buff = calloc(buffsize, sizeof(*buff));
-  if(!buff)
-    return zsv_status_memory;
+  char *line = calloc(buffsize, sizeof(*buff));
+  if(!line) {
+    stat = zsv_status_memory;
+    goto auto_detect_fixed_column_sizes_exit;
+  }
+  memset(line, ' ', buffsize);
 
-  int c;
-  size_t i;
-  char was_space = 1;
+  *buff_used = fread(buff, 1, buffsize, opts->stream);
+  if(!*buff_used) {
+    stat = zsv_status_no_more_input;
+    goto auto_detect_fixed_column_sizes_exit;
+  }
+
+  size_t line_end = 0;
+  size_t line_cursor = 0;
   char first = 1;
-  for(i = 0; i < buffsize-1; i++) {
-    c = fgetc(opts->stream);
-    if(c == EOF || c == '\n')
+  char was_space = 1;
+  char was_line_end = 0;
+  for(size_t i = 0; i < *buff_used && (!only_first_line || !line_end); i++, line_cursor = was_line_end ? 0 : line_cursor + 1) {
+    was_line_end = 0;
+    // to do: support multi-byte unicode chars?
+    switch(buff[i]) {
+    case '\n':
+    case '\r':
+      if(line_cursor > line_end)
+        line_end = line_cursor;
+      was_line_end = 1;
+      was_space = 1;
+      break;
+    case '\t':
+    case '\v':
+    case '\f':
+    case ' ':
+      was_space = 1;
       break;
-    buff[i] = c;
-    if(!isspace(c)) {
+    default:
+      line[line_cursor] = 'x';
       if(was_space) {
-        if(first)
-          first = 0;
-        else
-          fixed->count++;
+        if(!line_end) { // only count columns for the first line
+          if(first)
+            first = 0;
+          else
+            fixed->count++;
+        }
       }
       was_space = 0;
-    } else
-      was_space = 1;
+    }
   }
   if(!first)
     fixed->count++;
 
-  if(c != '\n' || !fixed->count) {
-    free(buff);
-    return zsv_status_error;
+  if(!line_end) {
+    stat = zsv_status_error;
+    goto auto_detect_fixed_column_sizes_exit;
   }
 
-  // free unused memory
-  char *buff_tmp = realloc(buff, i+1);
-  if(buff_tmp)
-    buff = buff_tmp;
-  *scanned = buff;
-  buffsize = i;
+  if(verbose)
+    fprintf(stderr, "Calculating %zu columns from line:\n%.*s\n", fixed->count, (int)line_end, line);
 
-  // set offset values
+  // allocate offsets
   free(fixed->offsets);
   fixed->offsets = malloc(fixed->count * sizeof(*fixed->offsets));
-  if(!fixed->offsets)
-    return zsv_status_memory;
+  if(!fixed->offsets) {
+    stat = zsv_status_memory;
+    goto auto_detect_fixed_column_sizes_exit;
+  }
 
+  // now we have our merged line, so calculate the sizes
   // do the loop again, but assign values this time
   int count = 0;
   was_space = 1;
   first = 1;
   if(verbose)
     fprintf(stderr, "Running --fixed ");
-  for(i = 0; i < buffsize; i++) {
-    c = buff[i];
-    if(c == EOF || c == '\0')
-      break;
-    buff[i] = c;
-    if(!isspace(c)) {
+  size_t i;
+  for(i = 0; i < line_end; i++) {
+    if(line[i] == 'x') {
       if(was_space) {
         if(first)
           first = 0;
@@ -683,7 +708,10 @@ static enum zsv_status auto_detect_fixed_column_sizes(struct fixed *fixed, struc
   }
   if(verbose)
     fprintf(stderr, "\n");
-  return zsv_status_ok;
+
+ auto_detect_fixed_column_sizes_exit:
+  free(line);
+  return stat;
 }
 
 
@@ -699,8 +727,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
   const char *input_path = NULL;
   struct zsv_csv_writer_options writer_opts = zsv_writer_get_default_opts();
   int col_index_arg_i = 0;
-  const char *insert_header_row = NULL;
-  char *fixed_auto_scanned_buff = NULL;
+  unsigned char *preview_buff = NULL;
+  size_t preview_buff_len = 0;
 
   enum zsv_status stat = zsv_status_ok;
   for(int arg_i = 1; stat == zsv_status_ok && arg_i < argc; arg_i++) {
@@ -727,7 +755,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
         for(const char *end = argv[arg_i]; ; end++) {
           if(*end == ',' || *end == '\0') {
             if(!sscanf(start, "%zu,", &data.fixed.offsets[count++])) {
-              stat = zsv_printerr(1, "Invalid offset: %s.*\n", end - start, start);
+              stat = zsv_printerr(1, "Invalid offset: %.*s\n", end - start, start);
               break;
             } else if(*end == '\0')
               break;
@@ -773,12 +801,6 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
       data.whitspace_clean_flags = 1;
     } else if(!strcmp(argv[arg_i], "-W") || !strcmp(argv[arg_i], "--no-trim")) {
       data.no_trim_whitespace = 1;
-    } else if(!strcmp(argv[arg_i], "--header-row")) {
-      arg_i++;
-      if(!(arg_i < argc))
-        stat = zsv_printerr(1, "%s option requires a header row value such as 'column_name1,\"column name 2\"'", argv[arg_i-1]);
-      else
-        insert_header_row = argv[arg_i];
     } else if(!strcmp(argv[arg_i], "--sample-every")) {
       arg_i++;
       if(!(arg_i < argc))
@@ -786,7 +808,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
       else if(atoi(argv[arg_i]) <= 0)
         stat = zsv_printerr(1, "--sample-every value should be an integer > 0");
       else
-        data.sample_every_n = atoi(argv[arg_i]);
+        data.sample_every_n = atoi(argv[arg_i]); // TO DO: check for overflow
     } else if(!strcmp(argv[arg_i], "--sample-pct")) {
       arg_i++;
       double d;
@@ -853,12 +875,15 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
   if(stat == zsv_status_ok && fixed_auto) {
     if(data.fixed.offsets)
       stat = zsv_printerr(zsv_status_error, "Please specify either --fixed-auto or --fixed, but not both");
-    else if(insert_header_row)
+    else if(data.opts->insert_header_row)
       stat = zsv_printerr(zsv_status_error, "--fixed-auto can not be specified together with --header-row");
     else {
-      stat = auto_detect_fixed_column_sizes(&data.fixed, data.opts, &fixed_auto_scanned_buff, opts->verbose);
-      if(fixed_auto_scanned_buff)
-        data.opts->insert_header_row = fixed_auto_scanned_buff;
+      size_t buffsize = 1024*256; // read the first
+      preview_buff = calloc(buffsize, sizeof(*preview_buff));
+      if(!preview_buff)
+        stat = zsv_printerr(zsv_status_memory, "Out of memory!");
+      else
+        stat = auto_detect_fixed_column_sizes(&data.fixed, data.opts, preview_buff, buffsize, &preview_buff_len, opts->verbose);
     }
   }
 
@@ -879,9 +904,6 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
     else {
       data.opts->row_handler = zsv_select_header_row;
       data.opts->ctx = &data;
-      if(!data.opts->insert_header_row)
-        data.opts->insert_header_row = insert_header_row;
-
       if(zsv_new_with_properties(data.opts, input_path, opts_used, &data.parser)
          == zsv_status_ok) {
         // all done with
@@ -903,16 +925,19 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
 
         // process the input data
         zsv_handle_ctrl_c_signal();
-        enum zsv_status status;
-        while(!zsv_signal_interrupted && !data.cancelled && (status = zsv_parse_more(data.parser)) == zsv_status_ok)
-          ;
+        enum zsv_status status = zsv_status_ok;
+        if(preview_buff && preview_buff_len)
+          status = zsv_parse_bytes(data.parser, preview_buff, preview_buff_len);
 
+        while(status == zsv_status_ok
+              && !zsv_signal_interrupted && !data.cancelled)
+          status = zsv_parse_more(data.parser);
         zsv_finish(data.parser);
         zsv_delete(data.parser);
       }
     }
   }
-  free(fixed_auto_scanned_buff);
+  free(preview_buff);
   zsv_select_cleanup(&data);
   if(writer_opts.stream && writer_opts.stream != stdout)
     fclose(writer_opts.stream);

+ 1 - 1
csv.mod/zsv/app/sql.c

@@ -307,7 +307,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
       f = stdin;
 
     if(f == stdin) {
-      tmpfn = zsv_get_temp_filename("zsv_sql");
+      tmpfn = zsv_get_temp_filename("zsv_sql_XXXXXXXX");
       if(!tmpfn) {
         fprintf(stderr, "Unable to create temp file name\n");
       } else {

+ 59 - 9
csv.mod/zsv/app/test/Makefile

@@ -48,7 +48,7 @@ TEST_DATA_DIR=${THIS_LIB_BASE}/data
 SOURCES= echo count count-pull select select-pull sql 2json serialize flatten pretty desc stack 2db 2tsv jq compare
 TARGETS=$(addprefix ${BUILD_DIR}/bin/zsv_,$(addsuffix ${EXE},${SOURCES}))
 
-TESTS=test-blank-leading-rows $(addprefix test-,${SOURCES})
+TESTS=test-blank-leading-rows $(addprefix test-,${SOURCES}) test-rm test-mv
 
 COLOR_NONE=\033[0m
 COLOR_GREEN=\033[1;32m
@@ -126,6 +126,14 @@ worldcitiespop_mil.csv:
 
 test-count test-count-pull: test-% : test-1-% test-2-%
 
+test-cli: ${CLI}
+	@${TEST_INIT}
+	@[ "${CLI}" = "" ] && echo 1>&2 'test-cli: missing CLI env var' && exit 1 || exit 0 
+	@$< help select 2>&1 > ${TMP_DIR}/[email protected]
+	@[ "`head -1 ${TMP_DIR}/[email protected]`" = "select: streaming CSV parser" ] && [ $$(( `cat ${TMP_DIR}/[email protected] | wc -l` )) = "35" ] && ${TEST_PASS} || ${TEST_FAIL}
+	@$< help count 2>&1 > ${TMP_DIR}/[email protected]
+	@[ "`head -1 ${TMP_DIR}/[email protected]`" = "Usage: count [options]" ] && [ $$(( `cat ${TMP_DIR}/[email protected] | wc -l` )) = "5" ] && ${TEST_PASS} || ${TEST_FAIL}
+
 test-1-count test-1-count-pull: test-1-% : ${BUILD_DIR}/bin/zsv_%${EXE} worldcitiespop_mil.csv
 	@${TEST_INIT}
 	@cat worldcitiespop_mil.csv | ${PREFIX} $< ${REDIRECT} ${TMP_DIR}/[email protected]
@@ -136,7 +144,7 @@ test-2-count test-2-count-pull: ${BUILD_DIR}/bin/zsv_count${EXE} ${TEST_DATA_DIR
 	@for x in 5000 5002 5004 5006 5008 5010 5013 5015 5017 5019 5021 5101 5105 5111 5113 5115 5117 5119 5121 5123 5125 5127 5129 5131 5211 5213 5215 5217 5311 5313 5315 5317 5413 5431 5433 5455 6133 ; do $< -r $$x ${TEST_DATA_DIR}/test/buffsplit_quote.csv ; done > ${TMP_DIR}/[email protected]
 	@${CMP} ${TMP_DIR}/[email protected] expected/test-2-count.out && ${TEST_PASS} || ${TEST_FAIL}
 
-test-select test-select-pull: test-% : test-n-% test-6-% test-7-% test-8-% test-9-% test-10-% test-quotebuff-% test-fixed-1-% test-fixed-2-% test-merge-%
+test-select test-select-pull: test-% : test-n-% test-6-% test-7-% test-8-% test-9-% test-10-% test-quotebuff-% test-fixed-1-% test-fixed-2-% test-fixed-3-% test-fixed-4-% test-merge-%
 
 test-merge-select test-merge-select-pull: test-merge-% : ${BUILD_DIR}/bin/zsv_%${EXE}
 	@${TEST_INIT}
@@ -198,18 +206,47 @@ test-10-select: test-10-% : ${BUILD_DIR}/bin/zsv_%${EXE}
 test-10-select-pull:
 	@echo 'N/a (test-10-select-pull)'
 
-test-fixed-1-select: ${BUILD_DIR}/bin/zsv_select${EXE}
+test-11-select test-11-select-pull: test-11-% : ${BUILD_DIR}/bin/zsv_%${EXE}
+	@${TEST_INIT}
+	@${PREFIX} (echo "A1,B1" | $< --header-row "column1,column2") > /tmp/[email protected]
+	@cmp /tmp/[email protected] expected/test-11-select.out && ${TEST_PASS} || ${TEST_FAIL}
+
+test-fixed-1-select test-fixed-1-select-pull: ${BUILD_DIR}/bin/zsv_select${EXE}
 	@${TEST_INIT}
 	@${PREFIX} $< ${TEST_DATA_DIR}/fixed.csv --fixed 3,7,12,18,20,21,22 ${REDIRECT} ${TMP_DIR}/[email protected]
-	@${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL}
+	@${CMP} ${TMP_DIR}/[email protected] expected/test-fixed-1-select.out && ${TEST_PASS} || ${TEST_FAIL}
 
-test-fixed-2-select: ${BUILD_DIR}/bin/zsv_select${EXE}
+test-fixed-2-select test-fixed-2-select-pull: ${BUILD_DIR}/bin/zsv_select${EXE}
 	@${TEST_INIT}
 	@${PREFIX} $< ${TEST_DATA_DIR}/fixed-auto.txt --fixed-auto ${REDIRECT} ${TMP_DIR}/[email protected]
-	@${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL}
+	@${CMP} ${TMP_DIR}/[email protected] expected/test-fixed-2-select.out && ${TEST_PASS} || ${TEST_FAIL}
+
+test-fixed-3-select test-fixed-3-select-pull: ${BUILD_DIR}/bin/zsv_select${EXE}
+	@${TEST_INIT}
+	@${PREFIX} $< ${TEST_DATA_DIR}/fixed-auto2.txt --fixed-auto ${REDIRECT} ${TMP_DIR}/[email protected]
+	@${CMP} ${TMP_DIR}/[email protected] expected/test-fixed-3-select.out && ${TEST_PASS} || ${TEST_FAIL}
+
+test-fixed-4-select test-fixed-4-select-pull: ${BUILD_DIR}/bin/zsv_select${EXE}
+	@${TEST_INIT}
+	@${PREFIX} $< ${TEST_DATA_DIR}/fixed-auto3.txt --fixed-auto ${REDIRECT} ${TMP_DIR}/[email protected]
+	@${CMP} ${TMP_DIR}/[email protected] expected/test-fixed-4-select.out && ${TEST_PASS} || ${TEST_FAIL}
+
+test-rm: ${BUILD_DIR}/bin/zsv_prop${EXE} ${BUILD_DIR}/bin/zsv_rm${EXE}
+	@${TEST_INIT}
+	@echo 'hi' > ${TMP_DIR}/[email protected]
+	@${PREFIX} ${BUILD_DIR}/bin/zsv_prop${EXE} ${TMP_DIR}/[email protected] -R 1 -d 2 ${REDIRECT} /dev/null
+	@find ${TMP_DIR}/.zsv/data/[email protected]/props.json -type f >/dev/null
+	@${PREFIX} ${BUILD_DIR}/bin/zsv_rm${EXE} ${TMP_DIR}/[email protected] -f ${REDIRECT} /dev/null
+	@find ${TMP_DIR}/.zsv/data/[email protected]/props.json -type f 2>/dev/null && ${TEST_FAIL} || ${TEST_PASS}
 
-test-fixed-1-select-pull test-fixed-2-select-pull:
-	@echo "Skipping $@"
+test-mv: ${BUILD_DIR}/bin/zsv_prop${EXE} ${BUILD_DIR}/bin/zsv_mv${EXE}
+	@${TEST_INIT}
+	@echo 'hi' > ${TMP_DIR}/[email protected]
+	@rm -rf ${TMP_DIR}/.zsv/data/[email protected] ${TMP_DIR}/.zsv/data/[email protected] ${TMP_DIR}/[email protected]
+	@${PREFIX} ${BUILD_DIR}/bin/zsv_prop${EXE} ${TMP_DIR}/[email protected] -R 1 -d 2 ${REDIRECT} /dev/null
+	@find ${TMP_DIR}/.zsv/data/[email protected]/props.json -type f >/dev/null
+	@${PREFIX} ${BUILD_DIR}/bin/zsv_mv${EXE} ${TMP_DIR}/[email protected] ${TMP_DIR}/[email protected] ${REDIRECT} /dev/null
+	@find ${TMP_DIR}/.zsv/data/[email protected]/props.json -type f >/dev/null && ${TEST_PASS} || ${TEST_FAIL}
 
 
 test-blank-leading-rows: test-blank-leading-rows-1 test-blank-leading-rows-2 test-blank-leading-rows-3 test-blank-leading-rows-4
@@ -246,12 +283,25 @@ test-stack2: ${BUILD_DIR}/bin/zsv_stack${EXE}
 	@${PREFIX} $< ${TEST_DATA_DIR}/stack2-[12].csv ${REDIRECT} ${TMP_DIR}/[email protected]
 	@${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL}
 
-test-sql test-flatten test-pretty : test-%: ${BUILD_DIR}/bin/zsv_%${EXE}
+test-sql test-flatten : test-%: ${BUILD_DIR}/bin/zsv_%${EXE}
 	@${TEST_INIT}
 	@( ( ! [ -s "${TEST_DATA_DIR}/test/$*.csv" ] ) && echo "No test input for $*") || \
 	(${PREFIX} $< ${ARGS-$*} < ${TEST_DATA_DIR}/test/$*.csv ${REDIRECT1} ${TMP_DIR}/[email protected] && \
 	${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL})
 
+test-pretty: test-pretty-1 test-pretty-escape-chars
+
+test-pretty-1 : test-%-1: ${BUILD_DIR}/bin/zsv_%${EXE}
+	@${TEST_INIT}
+	@( ( ! [ -s "${TEST_DATA_DIR}/test/$*.csv" ] ) && echo "No test input for $*") || \
+	(${PREFIX} $< ${ARGS-$*} < ${TEST_DATA_DIR}/test/$*.csv ${REDIRECT1} ${TMP_DIR}/[email protected] && \
+	${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL})
+
+test-pretty-escape-chars: ${BUILD_DIR}/bin/zsv_pretty${EXE}
+	@${TEST_INIT}
+	@(${PREFIX} $< ${ARGS-$*} < ${TEST_DATA_DIR}/test/pretty-escape.csv -M ${REDIRECT1} ${TMP_DIR}/[email protected] && \
+	${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL})
+
 test-2tsv: test-2tsv-1 test-2tsv-2
 
 test-2tsv-1 test-2tsv-2: test-% : ${BUILD_DIR}/bin/zsv_2tsv${EXE}

+ 59 - 10
csv.mod/zsv/app/test/prop/Makefile

@@ -17,7 +17,7 @@ TMP_DIR=${THIS_MAKEFILE_DIR}/tmp
 
 TEST_PASS=echo "${COLOR_BLUE}$@: ${COLOR_GREEN}Passed${COLOR_NONE}"
 TEST_FAIL=(echo "${COLOR_BLUE}$@: ${COLOR_RED}Failed!${COLOR_NONE}" && exit 1)
-TEST_INIT=mkdir -p ${TMP_DIR} && rm -f ${TMP_DIR}/test* && echo "${COLOR_PINK}$@: ${COLOR_NONE}"
+TEST_INIT=mkdir -p ${TMP_DIR} && rm -rf ${TMP_DIR}/test* && echo "${COLOR_PINK}$@: ${COLOR_NONE}"
 
 LEAKS=
 ifneq ($(LEAKS),)
@@ -36,50 +36,99 @@ help:
 	@echo "# run all tests:"
 	@echo "    make test"
 
-test: test-1 test-2 test-3 test-4 test-5 test-6 test-7 test-8 clean
+ifeq ($(EXE),)
+  $(error EXE is not defined)
+endif
+
+test: test-1 test-2 test-3 test-4 test-5 test-6 test-7 test-8 test-copy test-clean test-export test-import clean
 
 test-1:
 	@${TEST_INIT}
+	@${PREFIX} ${EXE} dummy.csv -R 2 ${SUFFIX}
+	@touch .zsv/data/dummy.csv/hello.json
 	@${PREFIX} ${EXE} dummy.csv --clear ${SUFFIX}
-	@${CHECK} [ "`${EXE} dummy.csv`" == "{}" ] && ${TEST_PASS} || ${TEST_FAIL}
+	@ls .zsv/data/dummy.csv/hello.json 2>/dev/null && ${TEST_FAIL} || ${TEST_PASS}
+	@${CHECK} [ "`${EXE} dummy.csv`" = "{}" ] && ${TEST_PASS} || ${TEST_FAIL}
 
 test-2:
 	@${TEST_INIT}
 	@${PREFIX} ${EXE} dummy.csv -R 2 ${SUFFIX}
-	@${CHECK} [ "`${EXE} dummy.csv|jq -c -S`" == '{"skip-head":2}' ] && ${TEST_PASS} || ${TEST_FAIL}
+	@${CHECK} [ "`${EXE} dummy.csv|jq -c -S`" = '{"skip-head":2}' ] && ${TEST_PASS} || ${TEST_FAIL}
 
 test-3:
 	@${TEST_INIT}
 	@${PREFIX} ${EXE} dummy.csv -d 3 ${SUFFIX}
-	@${CHECK} [ "`${EXE} dummy.csv|jq -c -S`" == '{"header-row-span":3,"skip-head":2}' ] && ${TEST_PASS} || ${TEST_FAIL}
+	@${CHECK} [ "`${EXE} dummy.csv|jq -c -S`" = '{"header-row-span":3,"skip-head":2}' ] && ${TEST_PASS} || ${TEST_FAIL}
 
 test-4:
 	@${TEST_INIT}
 	@${PREFIX} ${EXE} dummy.csv -R - ${SUFFIX}
-	@${CHECK} [ "`${EXE} dummy.csv|jq -c -S`" == '{"header-row-span":3}' ] && ${TEST_PASS} || ${TEST_FAIL}
+	@${CHECK} [ "`${EXE} dummy.csv|jq -c -S`" = '{"header-row-span":3}' ] && ${TEST_PASS} || ${TEST_FAIL}
 
 test-5:
 	@${TEST_INIT}
 	@${PREFIX} ${EXE} dummy.csv -d - ${SUFFIX}
-	@${CHECK} [ "`${EXE} dummy.csv|jq -c -S`" == '{}' ] && ${TEST_PASS} || ${TEST_FAIL}
+	@${CHECK} [ "`${EXE} dummy.csv|jq -c -S`" = '{}' ] && ${TEST_PASS} || ${TEST_FAIL}
 
 test-6:
 	@${TEST_INIT}
 	@${PREFIX} ${EXE} dummy.csv -d 3 ${SUFFIX}
-	@${CHECK} [ "`${EXE} dummy.csv|jq -c -S`" == '{"header-row-span":3}' ] && ${TEST_PASS} || ${TEST_FAIL}
+	@${CHECK} [ "`${EXE} dummy.csv|jq -c -S`" = '{"header-row-span":3}' ] && ${TEST_PASS} || ${TEST_FAIL}
 
 test-7:
 	@${TEST_INIT}
 	@${PREFIX} ${EXE} dummy.csv --clear ${SUFFIX}
-	@${CHECK} [ "`${EXE} dummy.csv`" == "{}" ] && ${TEST_PASS} || ${TEST_FAIL}
+	@${CHECK} [ "`${EXE} dummy.csv`" = "{}" ] && ${TEST_PASS} || ${TEST_FAIL}
 
 test-8:
 	@${TEST_INIT}
 	@${CHECK} ${EXE} detect.csv --clear
 	@${PREFIX} ${EXE} detect.csv --auto --save ${SUFFIX}
-	@${CHECK} [ "`${EXE} detect.csv|jq -c -S`" == '{"header-row-span":2,"skip-head":2}' ] && ${TEST_PASS} || ${TEST_FAIL}
+	@${CHECK} [ "`${EXE} detect.csv --list-files`" = "props.json" ] && ${TEST_PASS} || ${TEST_FAIL}
+	@${CHECK} [ "`${EXE} detect.csv|jq -c -S`" = '{"header-row-span":2,"skip-head":2}' ] && ${TEST_PASS} || ${TEST_FAIL}
 	@${EXE} detect.csv --clear
 
+test-copy:
+	@${TEST_INIT}
+	@rm -rf ${TMP_DIR}/$@
+	@mkdir -p ${TMP_DIR}/$@/.zsv/data/abc.csv
+	@echo '{}' > ${TMP_DIR}/$@/.zsv/data/abc.csv/props.json
+	@touch ${TMP_DIR}/$@/abc.csv
+	@[ "`${EXE} ${TMP_DIR}/$@/abc.csv --copy ${TMP_DIR}/$@/def.csv 2>&1`" = "${TMP_DIR}/$@/def.csv: No such file or directory" ] && ${TEST_PASS} || ${TEST_FAIL}
+	@touch ${TMP_DIR}/$@/def.csv
+	@${EXE} ${TMP_DIR}/$@/abc.csv --copy ${TMP_DIR}/$@/def.csv
+	@cmp ${TMP_DIR}/$@/.zsv/data/abc.csv/props.json ${TMP_DIR}/$@/.zsv/data/def.csv/props.json && ${TEST_PASS} || ${TEST_FAIL}
+
+test-clean:
+	@${TEST_INIT}
+	@rm -rf ${TMP_DIR}/$@
+	@mkdir -p ${TMP_DIR}/$@/.zsv/data/abc.csv
+	@echo '{}' > ${TMP_DIR}/$@/.zsv/data/abc.csv/props.json
+	@mkdir -p ${TMP_DIR}/$@/.zsv/data/def.csv
+	@echo '{}' > ${TMP_DIR}/$@/.zsv/data/def.csv/props.json
+	@touch ${TMP_DIR}/$@/abc.csv
+	@${EXE} ${TMP_DIR}/$@ --clean
+	@ [ ! -d ${TMP_DIR}/$@/.zsv/data/def.csv ] && [ -d ${TMP_DIR}/$@/.zsv/data/abc.csv ] && ${TEST_PASS} || ${TEST_FAIL}
+
+test-export:
+	@${TEST_INIT}
+	@rm -rf ${TMP_DIR}/$@
+	@mkdir -p ${TMP_DIR}/$@/.zsv/data/abc.csv
+	@echo '{"a":1}' > ${TMP_DIR}/$@/.zsv/data/abc.csv/props.json
+	@touch ${TMP_DIR}/$@/abc.csv
+	@ [ "`${EXE} ${TMP_DIR}/$@/abc.csv --export - | jq -c`" = '{"props.json":{"a":1}}' ] && ${TEST_PASS} || ${TEST_FAIL}
+
+test-import:
+	@${TEST_INIT}
+	@rm -rf ${TMP_DIR}/$@
+	@mkdir -p ${TMP_DIR}/$@/.zsv/data/abc.csv
+	@[ "`echo '{"props.json":{"b":1}}' | ${EXE} ${TMP_DIR}/$@/def.csv --import - 2>&1`" = "${TMP_DIR}/$@/def.csv: No such file or directory" ] && ${TEST_PASS} || ${TEST_FAIL}
+	@echo '{"props.json":{"b":1}}' | ${EXE} ${TMP_DIR}/$@/def.csv --import - -f
+	@[ "`jq -c < ${TMP_DIR}/$@/.zsv/data/def.csv/props.json`" = '{"b":1}' ] && ${TEST_PASS} || ${TEST_FAIL}
+	@touch ${TMP_DIR}/$@/def.csv
+	@[ "`echo '{"props.json":{"b":1}}' | ${EXE} ${TMP_DIR}/$@/def.csv --import - 2>&1`" = "${TMP_DIR}/$@/.zsv/data/def.csv/props.json: File exists" ] && ${TEST_PASS} || ${TEST_FAIL}
+	@[ "`echo '{"props.json":{"b":1}}' | ${EXE} ${TMP_DIR}/$@/def.csv --import - -f --dry`" = "${TMP_DIR}/$@/.zsv/data/def.csv/props.json" ] && ${TEST_PASS} || ${TEST_FAIL}
+
 clean:
 	@rm -rf ${TMP_DIR}
 

+ 79 - 70
csv.mod/zsv/app/utils/arg.c

@@ -115,10 +115,12 @@ void zsv_set_default_completed_callback(zsv_completed_callback cb, void *ctx) {
  *     -O,--other-delim <C>
  *     -q,--no-quote
  *     -R,--skip-head <n>: skip specified number of initial rows
- *     -d,--header-row-span <n>: apply header depth (rowspan) of n
+ *     -d,--header-row-span <n> : apply header depth (rowspan) of n
  *     -u,--malformed-utf8-replacement <replacement_string>: replacement string (can be empty) in case of malformed UTF8 input
  *       (default for "desc" commamnd is '?')
- *     -S,--keep-blank-headers: disable default behavior of ignoring leading blank rows
+ *     -S,--keep-blank-headers  : disable default behavior of ignoring leading blank rows
+ *     -0,--header-row <header> : insert the provided CSV as the first row (in position 0)
+ *                                e.g. --header-row 'col1,col2,\"my col 3\"'",
  *     -v,--verbose
  *
  * @param  argc      count of args to process
@@ -141,24 +143,13 @@ enum zsv_status zsv_args_to_opts(int argc, const char *argv[],
                                  char *opts_used
                                  ) {
 #ifdef ZSV_EXTRAS
-  static const char *short_args = "BcrtOqvRdSuL";
+  static const char *short_args = "BcrtOqvRdSu0L";
 #else
-  static const char *short_args = "BcrtOqvRdSu";
+  static const char *short_args = "BcrtOqvRdSu0";
 #endif
   assert(strlen(short_args) < ZSV_OPTS_SIZE_MAX);
 
-  *opts_out = zsv_get_default_opts();
-  int options_start = 1; // skip this many args before we start looking for options
-  int err = 0;
-  int new_argc = 0;
-  for(; new_argc < options_start && new_argc < argc; new_argc++)
-    argv_out[new_argc] = argv[new_argc];
-  if(opts_used) {
-    memset(opts_used, ' ', ZSV_OPTS_SIZE_MAX-1);
-    opts_used[ZSV_OPTS_SIZE_MAX-1] = '\0';
-  }
-
-  static const char *long_args[] = {
+  static const char *long_args[] = { //
     "buff-size",
     "max-column-count",
     "max-row-size",
@@ -170,12 +161,24 @@ enum zsv_status zsv_args_to_opts(int argc, const char *argv[],
     "header-row-span",
     "keep-blank-headers",
     "malformed-utf8-replacement",
+    "header-row",
 #ifdef ZSV_EXTRAS
     "limit-rows",
 #endif
     NULL
   };
 
+  *opts_out = zsv_get_default_opts();
+  int options_start = 1; // skip this many args before we start looking for options
+  int err = 0;
+  int new_argc = 0;
+  for(; new_argc < options_start && new_argc < argc; new_argc++)
+    argv_out[new_argc] = argv[new_argc];
+  if(opts_used) {
+    memset(opts_used, ' ', ZSV_OPTS_SIZE_MAX-1);
+    opts_used[ZSV_OPTS_SIZE_MAX-1] = '\0';
+  }
+
   for(int i = options_start; !err && i < argc; i++) {
     char arg = 0;
     if(*argv[i] != '-') { /* pass this option through */
@@ -218,67 +221,73 @@ enum zsv_status zsv_args_to_opts(int argc, const char *argv[],
     case 'R':
     case 'd':
     case 'u':
+    case '0':
       if(++i >= argc)
         err = fprintf(stderr, "Error: option %s requires a value\n", argv[i-1]);
-      else if(arg == 'O') {
+      else {
         const char *val = argv[i];
-        if(strlen(val) != 1 || *val == 0)
-          err = fprintf(stderr, "Error: delimiter '%s' may only be a single ascii character", val);
-        else if(strchr("\n\r\"", *val))
-          err = fprintf(stderr, "Error: column delimiter may not be '\\n', '\\r' or '\"'\n");
+        if(arg == 'O') {
+          if(strlen(val) != 1 || *val == 0)
+            err = fprintf(stderr, "Error: delimiter '%s' may only be a single ascii character", val);
+          else if(strchr("\n\r\"", *val))
+            err = fprintf(stderr, "Error: column delimiter may not be '\\n', '\\r' or '\"'\n");
         else
           opts_out->delimiter = *val;
-      } else if(arg == 'u') {
-        const char *val = argv[i];
-        if(!strcmp(val, "none"))
-          opts_out->malformed_utf8_replace = ZSV_MALFORMED_UTF8_DO_NOT_REPLACE;
-        else if(!*val)
-          opts_out->malformed_utf8_replace = ZSV_MALFORMED_UTF8_REMOVE;
-        else if(strlen(val) > 2 || *val < 0)
-          err = fprintf(stderr, "Error: %s value must be a single-byte UTF8 char, empty string or 'none'\n", argv[i-1]);
-        else
-          opts_out->malformed_utf8_replace = *val;
-      } else {
-        const char *val = argv[i];
-        /* arg = 'B', 'c', 'r', 'R', 'd', or 'L' (ZSV_EXTRAS only) */
-        long n = atol(val);
-        if(n < 0)
-          err = fprintf(stderr, "Error: option %s value may not be less than zero (got %li\n", val, n);
-#ifdef ZSV_EXTRAS
-        else if(arg == 'L') {
-          if(n < 1)
-            err = fprintf(stderr, "Error: max rows may not be less than 1 (got %s)\n", val);
-          else
-            opts_out->max_rows = n;
-        } else
-#endif
-        if(arg == 'B') {
-          if(n < ZSV_MIN_SCANNER_BUFFSIZE)
-            err = fprintf(stderr, "Error: buff size may not be less than %u (got %s)\n",
-                          ZSV_MIN_SCANNER_BUFFSIZE, val);
-          else
-            opts_out->buffsize = n;
-        } else if(arg == 'c') {
-          if(n < 8)
-            err = fprintf(stderr, "Error: max column count may not be less than 8 (got %s)\n", val);
+        } else if(arg == 'u') {
+          if(!strcmp(val, "none"))
+            opts_out->malformed_utf8_replace = ZSV_MALFORMED_UTF8_DO_NOT_REPLACE;
+          else if(!*val)
+            opts_out->malformed_utf8_replace = ZSV_MALFORMED_UTF8_REMOVE;
+          else if(strlen(val) > 2 || *val < 0)
+            err = fprintf(stderr, "Error: %s value must be a single-byte UTF8 char, empty string or 'none'\n", argv[i-1]);
           else
-            opts_out->max_columns = n;
-        } else if(arg == 'r') {
-          if(n < ZSV_ROW_MAX_SIZE_MIN)
-            err = fprintf(stderr, "Error: max row size size may not be less than %u (got %s)\n",
-                          ZSV_ROW_MAX_SIZE_MIN, val);
+            opts_out->malformed_utf8_replace = *val;
+        } else if(arg == '0') {
+          if(*val == 0)
+            err = fprintf(stderr, "Invalid empty Inserted header row\n");
           else
-            opts_out->max_row_size = n;
-        } else if(arg == 'd') {
-          if(n < 8 && n >= 0)
-            opts_out->header_span = n;
-          else
-            err = fprintf(stderr, "Error: header_span must be an integer between 0 and 8\n");
-        } else if(arg == 'R') {
-          if(n >= 0)
-            opts_out->rows_to_ignore = n;
-          else
-            err = fprintf(stderr, "Error: rows_to_skip must be >= 0\n");
+            opts_out->insert_header_row = argv[i];
+        } else {
+          /* arg = 'B', 'c', 'r', 'R', 'd', or 'L' (ZSV_EXTRAS only) */
+          long n = atol(val);
+          if(n < 0)
+            err = fprintf(stderr, "Error: option %s value may not be less than zero (got %li\n", val, n);
+#ifdef ZSV_EXTRAS
+          else if(arg == 'L') {
+            if(n < 1)
+              err = fprintf(stderr, "Error: max rows may not be less than 1 (got %s)\n", val);
+            else
+              opts_out->max_rows = n;
+          } else
+#endif
+            if(arg == 'B') {
+              if(n < ZSV_MIN_SCANNER_BUFFSIZE)
+                err = fprintf(stderr, "Error: buff size may not be less than %u (got %s)\n",
+                              ZSV_MIN_SCANNER_BUFFSIZE, val);
+              else
+                opts_out->buffsize = n;
+            } else if(arg == 'c') {
+              if(n < 8)
+                err = fprintf(stderr, "Error: max column count may not be less than 8 (got %s)\n", val);
+              else
+                opts_out->max_columns = n;
+            } else if(arg == 'r') {
+              if(n < ZSV_ROW_MAX_SIZE_MIN)
+                err = fprintf(stderr, "Error: max row size size may not be less than %u (got %s)\n",
+                              ZSV_ROW_MAX_SIZE_MIN, val);
+              else
+                opts_out->max_row_size = n;
+            } else if(arg == 'd') {
+              if(n < 8 && n >= 0)
+                opts_out->header_span = n;
+              else
+                err = fprintf(stderr, "Error: header_span must be an integer between 0 and 8\n");
+            } else if(arg == 'R') {
+              if(n >= 0)
+                opts_out->rows_to_ignore = n;
+              else
+                err = fprintf(stderr, "Error: rows_to_skip must be >= 0\n");
+            }
         }
       }
       break;

+ 4 - 2
csv.mod/zsv/app/utils/db.c

@@ -19,7 +19,7 @@ static char starts_w_str_underscore(const unsigned char *s, size_t s_len,
   return result;
 }
 
-int zsv_dbtable2json(sqlite3 *db, const char *tname, jsonwriter_handle jsw) {
+int zsv_dbtable2json(sqlite3 *db, const char *tname, jsonwriter_handle jsw, size_t limit) {
   int err = 0;
   const char *index_sql = "select name, sql from sqlite_master where type = 'index' and tbl_name = :tbl_name";
   const char *unique_sql = "select 1 from PRAGMA_index_list(?) where name = ? and [unique] <> 0";
@@ -116,6 +116,7 @@ int zsv_dbtable2json(sqlite3 *db, const char *tname, jsonwriter_handle jsw) {
       // ------ data: array of rows
       jsonwriter_start_array(jsw);
       // for each row
+      size_t count = 0;
       while(sqlite3_step(data_stmt) == SQLITE_ROW) {
         jsonwriter_start_array(jsw); // start row
         for(int i = 0; i < colcount; i++) {
@@ -127,7 +128,8 @@ int zsv_dbtable2json(sqlite3 *db, const char *tname, jsonwriter_handle jsw) {
             jsonwriter_null(jsw);
         }
         jsonwriter_end_array(jsw); // end row
-        //        sqlite3_reset(data_stmt);
+        if(limit && ++count >= limit)
+          break;
       }
       jsonwriter_end_array(jsw);
 

+ 82 - 44
csv.mod/zsv/app/utils/dirs.c

@@ -11,8 +11,12 @@
 #include <stdio.h>
 #include <string.h>
 #include <dirent.h>
+#include <errno.h>
 #include <zsv/utils/os.h>
+#include <zsv/utils/json.h>
+#include <zsv/utils/jq.h>
 #include <zsv/utils/dirs.h>
+#include <zsv/utils/string.h>
 #include <unistd.h> // unlink
 #include <sys/stat.h>
 
@@ -181,11 +185,6 @@ to do: add support for this OS!;
 
 #endif /* end of: #if defined(_WIN32) */
 
-struct remove_dir_ctx {
-  int err;
-  struct dir_path *dirs;
-};
-
 struct dir_path {
   struct dir_path *next;
   char *path;
@@ -210,70 +209,109 @@ static int rmdir_w_msg(const char *path, int *err) {
   return *err;
 }
 
-static void remove_files_collect_dirs(struct remove_dir_ctx *ctx, const char *path) {
-  // delete all files, collect dir names in reverse order
+static int zsv_foreach_dirent_remove(struct zsv_foreach_dirent_handle *h, size_t depth) {
+  (void)(depth);
+  if(!h->is_dir) { // file
+    if(h->parent_and_entry) {
+      if(unlink(h->parent_and_entry)) {
+        perror(h->parent_and_entry); // "Unable to remove file");
+        return 1;
+      }
+    }
+  } else {           // dir
+    struct dir_path *dn = calloc(1, sizeof(*dn));
+    if(!dn) {
+      fprintf(stderr, "Out of memory!\n");
+      return 1;
+    }
+    if(h->parent_and_entry) {
+      dn->path = strdup(h->parent_and_entry);
+      dn->next = *((struct dir_path **)h->ctx);
+      *((struct dir_path **)h->ctx) = dn;
+    }
+  }
+  return 0;
+}
+
+// return error
+static
+int zsv_foreach_dirent_aux(const char *dir_path,
+                           size_t depth,
+                           size_t max_depth,
+                           zsv_foreach_dirent_handler handler, void *ctx,
+                           char verbose
+                           ) {
+  int err = 0;
+  if(!dir_path)
+    return 1;
+
+  if(max_depth > 0 && depth > max_depth)
+    return 0;
+
   DIR *dr;
-  struct dir_path *previous_dir = ctx->dirs;
-  struct dir_path *most_recent_dir = NULL;
-  if((dr = opendir(path))) {
+  if((dr = opendir(dir_path))) {
     struct dirent *de;
     while((de = readdir(dr)) != NULL) {
       if(!*de->d_name || !strcmp(de->d_name, ".") || !strcmp(de->d_name, ".."))
         continue;
       char *tmp;
-      asprintf(&tmp, "%s%c%s", path, FILESLASH, de->d_name);
+      asprintf(&tmp, "%s%c%s", dir_path, FILESLASH, de->d_name);
       if(!tmp)
-        fprintf(stderr, "Out of memory!\n"), ctx->err = 1;
+        fprintf(stderr, "Out of memory!\n"), err = 1;
       else {
-        struct stat s;
-        stat(tmp, &s);
-        if (s.st_mode & S_IFDIR) { // it's a dir. save for later
-          struct dir_path *dn = calloc(1, sizeof(*dn));
-          if(!dn)
-            fprintf(stderr, "Out of memory!\n"), ctx->err = 1;
-          else {
-            most_recent_dir = dn;
-            dn->path = tmp;
-            dn->next = ctx->dirs;
-            ctx->dirs = dn;
-          }
-        } else { // not a dir. try to remove
-          if(unlink(tmp)) {
-            perror(tmp); // "Unable to remove file");
-            // fprintf(stderr, "%s\n", tmp);
-            ctx->err = 1;
-          }
-          free(tmp);
-        }
+        struct zsv_foreach_dirent_handle h = { 0 };
+        h.verbose = verbose;
+        stat(tmp, (struct stat *)&h.stat);
+        h.parent = dir_path;
+        h.entry = de->d_name;
+        h.parent_and_entry = tmp;
+        h.ctx = ctx;
+        char is_dir = h.stat.st_mode & S_IFDIR ? 1 : 0;
+        h.is_dir = is_dir;
+        if(handler)
+          err = handler(&h, depth + 1);
+
+        if(is_dir && !h.no_recurse)
+          // recurse!
+          zsv_foreach_dirent_aux(tmp, depth + 1, max_depth, handler, ctx, verbose);
+        free(tmp);
       }
     }
     closedir(dr);
   }
+  return err;
+}
 
-  // process all sub-dirs that we just collected
-  for(struct dir_path *dn = most_recent_dir; dn && dn != previous_dir; dn = dn->next)
-    remove_files_collect_dirs(ctx, dn->path);
+int zsv_foreach_dirent(const char *dir_path,
+                       size_t max_depth,
+                       zsv_foreach_dirent_handler handler, void *ctx,
+                       char verbose
+                       ) {
+  return zsv_foreach_dirent_aux(dir_path, 0, max_depth, handler, ctx, verbose);
 }
 
 /**
  * Remove a directory and all of its contents
  */
 int zsv_remove_dir_recursive(const unsigned char *path) {
-  const char *cpath = (void *)path;
-  struct remove_dir_ctx ctx = { 0 };
   // we will delete all files first, then
   // delete directories in the reverse order we received them
-  remove_files_collect_dirs(&ctx, cpath);
-
+  struct dir_path *reverse_dirs = NULL;
+  int err = zsv_foreach_dirent((const char *)path, 0,
+                               zsv_foreach_dirent_remove, &reverse_dirs, 0);
   // unlink and free each dir
-  for(struct dir_path *next, *dn = ctx.dirs; dn; dn = next) {
+  for(struct dir_path *next, *dn = reverse_dirs; !err && dn; dn = next) {
     next = dn->next;
-    rmdir_w_msg(dn->path, &ctx.err);
+    rmdir_w_msg(dn->path, &err);
     free(dn->path);
     free(dn);
   }
-  if(!ctx.err)
-    rmdir_w_msg(cpath, &ctx.err);
+  if(!err)
+    rmdir_w_msg((const char *)path, &err);
 
-  return ctx.err;
+  return err;
 }
+
+#include "dirs_to_json.c"
+
+#include "dirs_from_json.c"

+ 250 - 0
csv.mod/zsv/app/utils/dirs_from_json.c

@@ -0,0 +1,250 @@
+#include <yajl_helper.h>
+#include <zsv/utils/file.h>
+
+struct zsv_dir_from_json_ctx {
+  const char *filepath_prefix;
+  unsigned char buff[4096];
+  size_t content_start;
+  FILE *out;
+  char *out_filepath;
+  struct jv_to_json_ctx jctx;
+  zsv_jq_handle zjq;
+
+  int err;
+  unsigned char in_obj:1;
+  unsigned char do_check:1;
+  unsigned char dry:1;
+  unsigned char _:5;
+};
+
+static void zsv_dir_from_json_close_out(struct zsv_dir_from_json_ctx *ctx) {
+  if(ctx->zjq) {
+    zsv_jq_finish(ctx->zjq);
+    zsv_jq_delete(ctx->zjq);
+    ctx->zjq = NULL;
+  }
+  if(ctx->out) {
+    fclose(ctx->out);
+    ctx->out = NULL;
+    free(ctx->out_filepath);
+    ctx->out_filepath = NULL;
+  }
+}
+
+static int zsv_dir_from_json_map_key(struct yajl_helper_parse_state *st,
+                               const unsigned char *s, size_t len) {
+  if(yajl_helper_level(st) == 1 && len) { // new property file entry
+    struct zsv_dir_from_json_ctx *ctx = yajl_helper_data(st);
+
+    char *fn = NULL;
+    if(ctx->filepath_prefix)
+      asprintf(&fn, "%s%c%.*s", ctx->filepath_prefix, FILESLASH, (int)len, s);
+    else
+      asprintf(&fn, "%.*s", (int)len, s);
+    if(!fn) {
+      errno = ENOMEM;
+      perror(NULL);
+    } else if(ctx->do_check) {
+      // we just want to check if the destination file exists
+      if(access(fn, F_OK) != -1) { // it exists
+        ctx->err = errno = EEXIST;
+        perror(fn);
+      }
+    } else if(ctx->dry) { // just output the name of the file
+      printf("%s\n", fn);
+    } else if(zsv_mkdirs(fn, 1)) {
+      fprintf(stderr, "Unable to create directories for %s\n", fn);
+    } else if(!((ctx->out = fopen(fn, "wb")))) {
+      perror(fn);
+    } else {
+      ctx->out_filepath = fn;
+      fn = NULL;
+
+      // if it's a JSON file, use a jq filter to pretty-print
+      if(strlen(ctx->out_filepath) > 5 && !zsv_stricmp((const unsigned char *)ctx->out_filepath + strlen(ctx->out_filepath) - 5, (const unsigned char *)".json")) {
+        ctx->jctx.write1 = zsv_jq_fwrite1;
+        ctx->jctx.ctx = ctx->out;
+        ctx->jctx.flags = JV_PRINT_PRETTY | JV_PRINT_SPACE1;
+        enum zsv_jq_status jqstat;
+        ctx->zjq = zsv_jq_new((const unsigned char *)".", jv_to_json_func, &ctx->jctx, &jqstat);
+        if(!ctx->zjq) {
+          fprintf(stderr, "zsv_jq_new: unable to open for %s\n", ctx->out_filepath);
+          zsv_dir_from_json_close_out(ctx);
+        }
+      }
+    }
+    free(fn);
+  }
+  return 1;
+}
+
+static int zsv_dir_from_json_start_obj(struct yajl_helper_parse_state *st) {
+  if(yajl_helper_level(st) == 2) {
+    struct zsv_dir_from_json_ctx *ctx = yajl_helper_data(st);
+    ctx->in_obj = 1;
+    ctx->content_start = yajl_get_bytes_consumed(st->yajl) - 1;
+  }
+  return 1;
+}
+
+// zsv_dir_from_json_flush(): return err
+static int zsv_dir_from_json_flush(yajl_handle yajl, struct zsv_dir_from_json_ctx *ctx) {
+  if(ctx->zjq) {
+    size_t current_position = yajl_get_bytes_consumed(yajl);
+    if(current_position <= ctx->content_start)
+      fprintf(stderr, "Error! zsv_dir_from_json_flush unexpected current position\n");
+    else
+      zsv_jq_parse(ctx->zjq, ctx->buff + ctx->content_start,
+                   current_position - ctx->content_start);
+    ctx->content_start = 0;
+  }
+  return 0;
+}
+
+static int zsv_dir_from_json_end_obj(struct yajl_helper_parse_state *st) {
+  if(yajl_helper_level(st) == 1) { // just finished level 2
+    struct zsv_dir_from_json_ctx *ctx = yajl_helper_data(st);
+    zsv_dir_from_json_flush(st->yajl, yajl_helper_data(st));
+    zsv_dir_from_json_close_out(ctx);
+    ctx->in_obj = 0;
+  }
+  return 1;
+}
+
+static int zsv_dir_from_json_process_value(struct yajl_helper_parse_state *st,
+                                     struct json_value *value) {
+  if(yajl_helper_level(st) == 1) { // just finished level 2
+    struct zsv_dir_from_json_ctx *ctx = yajl_helper_data(st);
+    const unsigned char *jsstr;
+    size_t len;
+    json_value_default_string(value, &jsstr, &len);
+    if(ctx->zjq) {
+      unsigned char *js = len ? zsv_json_from_str_n(jsstr, len) : NULL;
+      if(js)
+        zsv_jq_parse(ctx->zjq, js, strlen((char *)js));
+      else
+        zsv_jq_parse(ctx->zjq, "null", 4);
+      free(js);
+    } else if(len && ctx->out)
+      fwrite(jsstr, 1, len, ctx->out);
+    zsv_dir_from_json_close_out(ctx);
+  }
+  return 1;
+}
+
+/**
+ * Convert a JSON stream into file and directory contents
+ * This function is the inverse of zsv_dir_to_json()
+ * Output schema is a dictionary where key = path and value = contents
+ * Files named with .json suffix will be exported as JSON (content must be valid JSON)
+ * Files named with any other suffix will be exported as a single string value (do not try with large files)
+ */
+int zsv_dir_from_json(const unsigned char *target_dir,
+                      FILE *src,
+                      unsigned int flags, // ZSV_DIR_FLAG_XX
+                      unsigned char _verbose
+                      ) {
+  (void)(_verbose);
+  int err   = 0;
+  int force = !!(flags & ZSV_DIR_FLAG_FORCE);
+  int dry   = !!(flags & ZSV_DIR_FLAG_DRY);
+  char *tmp_fn = NULL; // only used if force = 0 and src == stdin
+  if(!force) {
+    // if input is stdin, we'll need to read it twice, so save it first
+    // this isn't the most efficient way to do it, as it reads it 3 times
+    // but it's easier and the diff is immaterial
+    if(src == stdin) {
+      src = NULL;
+      tmp_fn = zsv_get_temp_filename("zsv_prop_XXXXXXXX");
+      FILE *tmp_f;
+      if(!tmp_fn) {
+        err = errno = ENOMEM;
+        perror(NULL);
+      } else if(!(tmp_f = fopen(tmp_fn, "wb"))) {
+        err = errno;
+        perror(tmp_fn);
+      } else {
+        err = zsv_copy_file_ptr(stdin, tmp_f);
+        fclose(tmp_f);
+        if(!(src = fopen(tmp_fn, "rb"))) {
+          err = errno;
+          perror(tmp_fn);
+        }
+      }
+    }
+  }
+
+  if(!err) {
+    // we will run this loop either once (force) or twice (no force):
+    // 1. check before running (no force)
+    // 2. do the import
+    char do_check = !force;
+    if(do_check && !zsv_dir_exists((const char *)target_dir))
+      do_check = 0;
+
+    for(int i = do_check ? 0 : 1; i < 2 && !err; i++) {
+      do_check = i == 0;
+
+      size_t bytes_read;
+      struct yajl_helper_parse_state st;
+      struct zsv_dir_from_json_ctx ctx = { 0 };
+      ctx.filepath_prefix = (const char *)target_dir;
+
+      int (*start_obj)(struct yajl_helper_parse_state *st) = NULL;
+      int (*end_obj)(struct yajl_helper_parse_state *st) = NULL;
+      int (*process_value)(struct yajl_helper_parse_state *, struct json_value *) = NULL;
+
+      if(do_check)
+        ctx.do_check = do_check;
+      else {
+        ctx.dry = dry;
+        if(!ctx.dry) {
+          start_obj = zsv_dir_from_json_start_obj;
+          end_obj = zsv_dir_from_json_end_obj;
+          process_value = zsv_dir_from_json_process_value;
+        }
+      }
+
+      if(yajl_helper_parse_state_init(&st, 32,
+                                      start_obj, end_obj, // map start/end
+                                      zsv_dir_from_json_map_key,
+                                      start_obj, end_obj, // array start/end
+                                      process_value,
+                                      &ctx) != yajl_status_ok) {
+        err = errno = ENOMEM;
+        perror(NULL);
+      } else {
+        while((bytes_read = fread(ctx.buff, 1, sizeof(ctx.buff), src)) > 0) {
+          if(yajl_parse(st.yajl, ctx.buff, bytes_read) != yajl_status_ok)
+            yajl_helper_print_err(st.yajl, ctx.buff, bytes_read);
+          if(ctx.in_obj)
+            zsv_dir_from_json_flush(st.yajl, &ctx);
+        }
+        if(yajl_complete_parse(st.yajl) != yajl_status_ok)
+          yajl_helper_print_err(st.yajl, ctx.buff, bytes_read);
+
+        if(ctx.out) { // e.g. if bad JSON and parse failed
+          fclose(ctx.out);
+          free(ctx.out_filepath);
+        }
+      }
+      yajl_helper_parse_state_free(&st);
+
+      if(ctx.err)
+        err = ctx.err;
+      if(i == 0) {
+        rewind(src);
+        if(errno) {
+          err = errno;
+          perror(NULL);
+        }
+      }
+    }
+  }
+  if(tmp_fn) {
+    unlink(tmp_fn);
+    free(tmp_fn);
+  }
+
+  return err;
+}

+ 118 - 0
csv.mod/zsv/app/utils/dirs_to_json.c

@@ -0,0 +1,118 @@
+struct zsv_dir_foreach_to_json_ctx {
+  struct zsv_dir_filter zsv_dir_filter;
+  const unsigned char *parent_dir;
+  struct jv_to_json_ctx jctx;
+  zsv_jq_handle zjq;
+  unsigned count; // number of files exported so far
+  int err;
+};
+
+static int zsv_dir_foreach_to_json(struct zsv_foreach_dirent_handle *h, size_t depth) {
+  struct zsv_dir_foreach_to_json_ctx *ctx = h->ctx;
+  h->ctx = ctx->zsv_dir_filter.ctx;
+  if(!ctx->zsv_dir_filter.filter(h, depth))
+    h->no_recurse = 1; // skip this node (only matters if node is dir)
+  else if(!h->is_dir) { // process this file
+    char suffix = 0;
+    if(strlen(h->parent_and_entry) > 5 && !zsv_stricmp((const unsigned char *)h->parent_and_entry + strlen(h->parent_and_entry) - 5, (const unsigned char *)".json"))
+      suffix = 'j'; // json
+    else if(strlen(h->parent_and_entry) > 4 && !zsv_stricmp((const unsigned char *)h->parent_and_entry + strlen(h->parent_and_entry) - 4, (const unsigned char *)".txt"))
+      suffix = 't'; // text
+    if(suffix) {
+      // for now, only handle json or txt
+      FILE *f = fopen(h->parent_and_entry, "rb");
+      if(!f)
+        perror(h->parent_and_entry);
+      else {
+        // create an entry for this file. the map key is the file name; its value is the file contents
+        unsigned char *js = zsv_json_from_str((const unsigned char *)h->parent_and_entry + strlen((const char *)ctx->parent_dir) + 1);
+        if(!js)
+          errno = ENOMEM, perror(NULL);
+        else if(*js) {
+          if(ctx->count > 0)
+            if(zsv_jq_parse(ctx->zjq, ",", 1))
+              ctx->err = 1;
+          if(!ctx->err) {
+            ctx->count++;
+            if(zsv_jq_parse(ctx->zjq, js, strlen((const char *)js)) || zsv_jq_parse(ctx->zjq, ":", 1))
+              ctx->err = 1;
+            else {
+              switch(suffix) {
+              case 'j': // json
+                if(zsv_jq_parse_file(ctx->zjq, f))
+                  ctx->err = 1;
+                break;
+              case 't': // txt
+                // for now we are going to limit txt file values to 4096 chars and JSON-stringify it
+                {
+                  unsigned char buff[4096];
+                  size_t n = fread(buff, 1, sizeof(buff), f);
+                  unsigned char *txt_js = NULL;
+                  if(n) {
+                    txt_js = zsv_json_from_str_n(buff, n);
+                    if(zsv_jq_parse(ctx->zjq, txt_js ? txt_js : (const unsigned char *)"null", txt_js ? strlen((const char *)txt_js) : 4))
+                      ctx->err = 1;
+                  }
+                }
+                break;
+              }
+            }
+          }
+        }
+        free(js);
+        fclose(f);
+      }
+    }
+  }
+  h->ctx = ctx;
+  return 0;
+}
+
+/**
+ * Convert directory contents into a single JSON file
+ * Output schema is a dictionary where key = path and value = contents
+ * Files named with .json suffix will be exported as JSON (content must be valid JSON)
+ * Files named with any other suffix will be exported as a single string value (do not try with large files)
+ *
+ * @param parent_dir : directory to export
+ * @param dest       : file path to output to, or NULL to output to stdout
+ */
+int zsv_dir_to_json(const unsigned char *parent_dir,
+                    const unsigned char *output_filename,
+                    struct zsv_dir_filter *zsv_dir_filter,
+                    unsigned char verbose
+                    ) {
+  int err = 0;
+  FILE *fdest = output_filename ? fopen((const char *)output_filename, "wb") : stdout;
+  if(!fdest)
+    err = errno, perror((const char *)output_filename);
+  else {
+    struct zsv_dir_foreach_to_json_ctx ctx = { 0 };
+    ctx.zsv_dir_filter = *zsv_dir_filter;
+    ctx.parent_dir = parent_dir;
+
+    // use a jq filter to pretty-print
+    ctx.jctx.write1 = zsv_jq_fwrite1;
+    ctx.jctx.ctx = fdest;
+    ctx.jctx.flags = JV_PRINT_PRETTY | JV_PRINT_SPACE1;
+    enum zsv_jq_status jqstat;
+    ctx.zjq = zsv_jq_new((const unsigned char *)".", jv_to_json_func, &ctx.jctx, &jqstat);
+    if(!ctx.zjq)
+      err = 1, fprintf(stderr, "zsv_jq_new\n");
+    else {
+      if(jqstat == zsv_jq_status_ok && zsv_jq_parse(ctx.zjq, "{", 1) == zsv_jq_status_ok) {
+        // export each file
+        zsv_foreach_dirent((const char *)parent_dir, ctx.zsv_dir_filter.max_depth, zsv_dir_foreach_to_json,
+                           &ctx, verbose);
+        if(!ctx.err && zsv_jq_parse(ctx.zjq, "}", 1))
+          ctx.err = 1;
+        if(!ctx.err && zsv_jq_finish(ctx.zjq))
+          ctx.err = 1;
+        zsv_jq_delete(ctx.zjq);
+      }
+      err = ctx.err;
+    }
+    fclose(fdest);
+  }
+  return err;
+}

+ 95 - 17
csv.mod/zsv/app/utils/file.c

@@ -13,6 +13,10 @@
 #include <unistd.h> // for close()
 #include <fcntl.h> // open
 
+#include <zsv/utils/dirs.h>
+#include <zsv/utils/file.h>
+
+
 #if defined(_WIN32) || defined(WIN32) || defined(WIN)
 #include <windows.h>
 
@@ -33,16 +37,22 @@ char *zsv_get_temp_filename(const char *prefix) {
 }
 #else
 
+/**
+ * Get a temp file name. The returned value, if any, will have been allocated
+ * on the heap, and the caller should `free()`
+ *
+ * @param prefix string with which the resulting file name will be prefixed
+ */
 char *zsv_get_temp_filename(const char *prefix) {
   char *s = NULL;
   char *tmpdir = getenv("TMPDIR");
   if(!tmpdir)
     tmpdir = ".";
   asprintf(&s, "%s/%s_XXXXXXXX", tmpdir, prefix);
-#ifndef NDEBUG
-  fprintf(stderr, "creating temp file: %s\n", s ? s : "Out of memory!");
-#endif
-  if(s) {
+  if(!s) {
+    const char *msg = strerror(errno);
+    fprintf(stderr, "%s%c%s: %s\n", tmpdir, FILESLASH, prefix, msg ? msg : "Unknown error");
+  } else {
     int fd = mkstemp(s);
     if(fd > 0) {
       close(fd);
@@ -91,6 +101,85 @@ void zsv_redirect_file_from_temp(FILE *f, int bak, int old_fd) {
   close(bak);
 }
 
+#if defined(_WIN32) || defined(WIN32) || defined(WIN)
+int zsv_file_exists(const char* filename) {
+  DWORD attributes = GetFileAttributes(filename);
+  return (attributes != INVALID_FILE_ATTRIBUTES && !(attributes & FILE_ATTRIBUTE_DIRECTORY));
+}
+#else
+# include <sys/stat.h> // S_IRUSR S_IWUSR
+
+int zsv_file_exists(const char* filename) {
+  struct stat buffer;
+  if(stat(filename, &buffer) == 0) {
+    char is_dir = buffer.st_mode & S_IFDIR ? 1 : 0;
+    if(!is_dir)
+      return 1;
+  }
+  return 0;
+}
+#endif
+
+/**
+ * Copy a file, given source and destination paths
+ * On error, output error message and return non-zero
+ */
+int zsv_copy_file(const char *src, const char *dest) {
+  // create one or more directories if needed
+  if(zsv_mkdirs(dest, 1)) {
+    fprintf(stderr, "Unable to create directories needed for %s\n", dest);
+    return -1;
+  }
+
+  // copy the file
+  int err = 0;
+  FILE *fsrc = fopen(src, "rb");
+  if(!fsrc)
+    err = errno ? errno : -1, perror(src);
+  else {
+    FILE *fdest = fopen(dest, "wb");
+    if(!fdest)
+      err = errno ? errno : -1, perror(dest);
+    else {
+      err = zsv_copy_file_ptr(fsrc, fdest);
+      if(err)
+        perror(dest);
+      fclose(fdest);
+    }
+    fclose(fsrc);
+  }
+  return err;
+}
+
+/**
+ * Copy a file, given source and destination FILE pointers
+ * Return error number per errno.h
+ */
+int zsv_copy_file_ptr(FILE *src, FILE *dest) {
+  int err = 0;
+  char buffer[4096];
+  size_t bytes_read;
+  while((bytes_read = fread(buffer, 1, sizeof(buffer), src)) > 0) {
+    if(fwrite(buffer, 1, bytes_read, dest) != bytes_read) {
+      err = errno ? errno : -1;
+      break;
+    }
+  }
+  return err;
+}
+
+size_t zsv_dir_len_basename(const char *filepath, const char **basename) {
+  for(size_t len = strlen(filepath); len; len--) {
+    if(filepath[len-1] == '/' || filepath[len-1] == '\\') {
+      *basename = filepath + len;
+      return len - 1;
+    }
+  }
+
+  *basename = filepath;
+  return 0;
+}
+
 int zsv_file_readable(const char *filename, int *err, FILE **f_out) {
   FILE *f;
   int rc;
@@ -101,19 +190,8 @@ int zsv_file_readable(const char *filename, int *err, FILE **f_out) {
     rc = 0;
     if(err)
       *err = errno;
-    else switch(errno) {
-      case ENOENT:
-	fprintf(stderr, "File '%s' not found\n", filename);
-	break;
-      case EACCES:
-	fprintf(stderr, "No permissions to read '%s'\n", filename);
-	break;
-      case EISDIR:
-	fprintf(stderr, "File '%s' is a directory\n", filename);
-	break;
-      default:
-	fprintf(stderr, "Unknown error opening '%s'\n", filename);
-      }
+    else
+      perror(filename);
   } else {
     rc = 1;
     if(f_out)

+ 2 - 0
csv.mod/zsv/data/fixed-auto2.txt

@@ -0,0 +1,2 @@
+originator              loanid           origdate             lifecap           lifefloor            teaser          prod
+Retail                        1             8/2/2005                18                  3                 0          HELOC

+ 2 - 0
csv.mod/zsv/data/fixed-auto3.txt

@@ -0,0 +1,2 @@
+originator              loanid           origdate             lifecap           lifefloor            teaser          prodtype
+Retail                        1             8/2/2005                18                  3                 0          HELOC

+ 2 - 0
csv.mod/zsv/data/test/pretty-escape.csv

@@ -0,0 +1,2 @@
+hi,t|\here
+how|xx,are

+ 1 - 1
csv.mod/zsv/docs/compare.md

@@ -181,7 +181,7 @@ to get:
 
 No rigorous benchmarking has yet been performed, but preliminary testing yields reasonable performance and memory usage.
 
-Running a comparison of two 40MB CSV files, each a table of 100,000 rows with 61 columns, containing approximately
+Using a 2019 MBA, running a comparison of two 40MB CSV files, each a table of 100,000 rows with 61 columns, containing approximately
 60,000 differences, took about 5.5 seconds and used a maximum about 1.8MB of RAM on a 2019 MBA.
 
 The same test with sorting used significantly more memory (up to ~40MB) and took about 7.8 seconds to complete.

+ 4 - 1
csv.mod/zsv/examples/lib/Makefile

@@ -72,7 +72,10 @@ help:
 
 build: simple print_my_column parse_by_chunk pull
 
-test: test-eol
+test: test-eol test-tiny
+
+test-tiny: build/simple${EXE}
+	@[ "`echo '' | $< - 2>&1`" = "" ] && ${TEST_PASS} || ${TEST_FAIL}
 
 test-eol: test-eol-1 test-eol-2 test-eol-3 test-eol-4
 

+ 2 - 0
csv.mod/zsv/include/zsv/common.h

@@ -197,6 +197,8 @@ struct zsv_opts {
    * if the actual data does not have a header row with column names, the caller
    * should provide one (in CSV format) which will be treated as if it was the
    * first row of data
+   *
+   * cli option: -0,--header-row
    */
   const char *insert_header_row;
 

+ 0 - 1
csv.mod/zsv/include/zsv/utils/cache.h

@@ -33,7 +33,6 @@ unsigned char *zsv_cache_path(const unsigned char *data_filepath,
                               const unsigned char *cache_filename,
                               char temp_file);
 
-
 enum zsv_cache_type {
   zsv_cache_type_property = 1,
   zsv_cache_type_tag

+ 3 - 1
csv.mod/zsv/include/zsv/utils/db.h

@@ -12,6 +12,8 @@
 #include <sqlite3.h>
 #include <jsonwriter.h>
 
-int zsv_dbtable2json(sqlite3 *db, const char *tname, jsonwriter_handle jsw);
+// zsv_dbtable2json: convert a db table to json
+// @limit: if nonzero, limit number of rows processed
+int zsv_dbtable2json(sqlite3 *db, const char *tname, jsonwriter_handle jsw, size_t limit);
 
 #endif

+ 83 - 0
csv.mod/zsv/include/zsv/utils/dirs.h

@@ -9,6 +9,8 @@
 #ifndef ZSV_DIRS_H
 #define ZSV_DIRS_H
 
+#include <stdio.h>
+
 /* Maximum length of file name */
 #if !defined(FILENAME_MAX)
 #   define FILENAME_MAX MAX_PATH
@@ -53,4 +55,85 @@ int zsv_mkdirs(const char *path, char path_is_filename);
  */
 int zsv_remove_dir_recursive(const unsigned char *path);
 
+#include <sys/stat.h>
+
+struct zsv_foreach_dirent_handle {
+  const char *parent;           /* name of the parent directory */
+  const char *entry;            /* file / dir name of current entry being processed */
+  const char *parent_and_entry; /* parent + entry separated by file separator */
+  const struct stat stat;       /* stat of current entry */
+
+  void *ctx;                    /* caller-provided context to pass to handler */
+
+  unsigned char verbose:1;
+  unsigned char is_dir:1;       /* non-zero if this entry is a directory */
+  unsigned char no_recurse:1;        /* set to 1 when handling a dir to prevent recursing into it */
+  unsigned char _:5;
+};
+
+typedef int (*zsv_foreach_dirent_handler)(struct zsv_foreach_dirent_handle *h, size_t depth);
+
+/**
+ * Recursively process entries (files and folders) in a directory
+ *
+ * @param dir_path    : path of directory to begin processing children of
+ * @param max_depth   : maximum depth to recurse, or 0 for no maximum
+ * @param handler     : caller-provided entry handler. return 0 on success, non-zero on error
+ * @param ctx         : pointer passed to the handler
+ * @param verbose     : non-zero for verbose output
+ *
+ * returns error
+ */
+int zsv_foreach_dirent(const char *dir_path,
+                       size_t max_depth,
+                       zsv_foreach_dirent_handler handler,
+                       void *ctx,
+                       char verbose
+                       );
+
+struct zsv_dir_filter {
+  zsv_foreach_dirent_handler filter; /* filter function; return 1 to process this node. if node is dir, return 0 to skip entirely */
+  size_t max_depth;                  /* max depth to recurse */
+  void *ctx;                         /* pointer to pass to filter function */
+};
+
+/**
+ * Convert directory contents into a single JSON file
+ * Output schema is a dictionary where key = path and value = contents
+ * Files named with .json suffix will be exported as JSON (content must be valid JSON)
+ * Files named with any other suffix will be exported as a single string value (do not try with large files)
+ *
+ * @param parent_dir      : directory to export
+ * @param output_filename : file path to output to, or NULL to output to stdout
+ * @param file_filter     : filter determining which files to export
+ */
+int zsv_dir_to_json(const unsigned char *parent_dir,
+                    const unsigned char *output_filename,
+                    struct zsv_dir_filter *file_filter,
+                    unsigned char verbose
+                    );
+
+/**
+ * Convert a JSON stream into file and directory contents
+ * This function is the inverse of zsv_dir_to_json()
+ * Output schema is a dictionary where key = path and value = contents
+ * Files named with .json suffix will be exported as JSON (content must be valid JSON)
+ * Files named with any other suffix will be exported as a single string value (do not try with large files)
+ *
+ * @param target_dir : directory to create / import into
+ * @param        : file path to output to, or NULL to output to stdout
+ * @flags        : ZSV_DIR_FLAG_xxx values as defined below
+ *
+ * Returns 0 on success, non-zero on error
+ */
+
+#define ZSV_DIR_FLAG_FORCE 1 /* overwrite target files if they already exist */
+#define ZSV_DIR_FLAG_DRY   2 /* dry run, output names of files that would be created/overwritten */
+
+int zsv_dir_from_json(const unsigned char *target_dir,
+                      FILE *fsrc,
+                      unsigned int flags, // ZSV_DIR_FLAG_XX
+                      unsigned char verbose
+                      );
+
 #endif

+ 27 - 1
csv.mod/zsv/include/zsv/utils/file.h

@@ -20,12 +20,20 @@
 #endif // LINEEND
 /**
  * Get a temp file name. The returned value, if any, will have been allocated
- * on the stack, and the caller should `free()`
+ * on the heap, and the caller should `free()`
  *
  * @param prefix string with which the resulting file name will be prefixed
  */
 char *zsv_get_temp_filename(const char *prefix);
 
+/**
+ * Check if a file exists and is readable (with fopen + "rb")
+ *
+ * @param filename
+ * @returns: true  (1) if file exists
+ */
+int zsv_file_exists(const char* filename);
+
 /**
  * Check if a file exists and is readable (with fopen + "rb")
  *
@@ -46,5 +54,23 @@ int zsv_file_readable(const char *filename, int *err, FILE **f_out);
  */
 size_t zsv_filter_write(void *FILEp, unsigned char *buff, size_t bytes_read);
 
+/**
+ * Get a file path's directory length and base name
+ * Returns the length of the directory portion of the path
+ * and the base name portion of the path
+ */
+size_t zsv_dir_len_basename(const char *filepath, const char **basename);
+
+/**
+ * Copy a file. Create any needed directories
+ * On error, prints error message and returns non-zero
+ */
+int zsv_copy_file(const char *src, const char *dest);
+
+/**
+ * Copy a file, given source and destination FILE pointers
+ * Return error number per errno.h
+ */
+int zsv_copy_file_ptr(FILE *src, FILE *dest);
 
 #endif

+ 27 - 0
csv.mod/zsv/include/zsv/utils/prop.h

@@ -67,4 +67,31 @@ enum zsv_status zsv_new_with_properties(struct zsv_opts *opts,
                                         const char *opts_used,
                                         zsv_parser *handle_out
                                         );
+
+/**
+ * If you are building your own CLI and incorporating zsv CLI commands into it,
+ * the `prop` command can be customized by providing your own function
+ * for determining whether a file in the property cache is a property file,
+ * which can be set using zsv_prop_get_or_set_is_prop_file()
+ *
+ * @param is_prop_file: your function, that returns non-zero if the given file entry
+ *                             is a property file. If NULL, is set to zsv_is_prop_file
+ * @param max_depth   : maximum depth of any property file. if is_prop_file was NULL,
+ *                      max_depth is set to 1
+ */
+#include "dirs.h"
+
+struct zsv_dir_filter; /* opaque structure for internal use */
+struct zsv_dir_filter *
+zsv_prop_get_or_set_is_prop_file(
+                                 int (*is_prop_file)(struct zsv_foreach_dirent_handle *, size_t),
+                                 int max_depth,
+                                 char set
+                                 );
+
+/**
+ * If you provide your own is_prop_file() function and you also want to include any
+ * zsv property file, your is_prop_file() can call zsv_is_prop_file()
+ */
+int zsv_is_prop_file(struct zsv_foreach_dirent_handle *h, size_t depth);
 #endif

+ 31 - 13
csv.mod/zsv/src/zsv.c

@@ -116,22 +116,31 @@ inline static size_t scanner_pre_parse(struct zsv_scanner *scanner) {
   return capacity;
 }
 
+/**
+ * apply --header-row option
+ */
+static enum zsv_status zsv_insert_string(struct zsv_scanner *scanner) {
+  // to do: replace below with
+  // return parse_bytes(scanner, bytes, len);
+  size_t len = strlen(scanner->insert_string);
+  if(len > scanner->buff.size - scanner->partial_row_length)
+    len = scanner->buff.size - 1; // to do: throw an error instead
+  memcpy(scanner->buff.buff + scanner->partial_row_length, scanner->insert_string, len);
+  if(scanner->buff.buff[len] != '\n')
+    scanner->buff.buff[len] = '\n';
+  enum zsv_status stat = zsv_scan(scanner, scanner->buff.buff, len + 1);
+  scanner->insert_string = NULL;
+  return stat;
+}
+
 /**
  * Read the next chunk of data from our input stream and parse it, calling our
  * custom handlers as each cell and row are parsed
  */
 ZSV_EXPORT
 enum zsv_status zsv_parse_more(struct zsv_scanner *scanner) {
-  if(scanner->insert_string) {
-    size_t len = strlen(scanner->insert_string);
-    if(len > scanner->buff.size - scanner->partial_row_length)
-      len = scanner->buff.size - 1; // to do: throw an error instead
-    memcpy(scanner->buff.buff + scanner->partial_row_length, scanner->insert_string, len);
-    if(scanner->buff.buff[len] != '\n')
-      scanner->buff.buff[len] = '\n';
-    zsv_scan(scanner, scanner->buff.buff, len + 1);
-    scanner->insert_string = NULL;
-  }
+  if(VERY_UNLIKELY(scanner->insert_string != NULL))
+    zsv_insert_string(scanner);
 
   size_t capacity = scanner_pre_parse(scanner);
   size_t bytes_read;
@@ -143,13 +152,16 @@ enum zsv_status zsv_parse_more(struct zsv_scanner *scanner) {
 #endif
     size_t bom_len = strlen(ZSV_BOM);
     scanner->checked_bom = 1;
-    if(scanner->read(scanner->buff.buff, 1, bom_len, scanner->in) == bom_len
+    if((bytes_read = scanner->read(scanner->buff.buff, 1, bom_len, scanner->in)) == bom_len
        && !memcmp(scanner->buff.buff, ZSV_BOM, bom_len)) {
       // have bom. disregard what we just read
       bytes_read = scanner->read(scanner->buff.buff, 1, capacity, scanner->in);
       scanner->had_bom = 1;
-    } else // no BOM. keep the bytes we just read
-      bytes_read = bom_len + scanner->read(scanner->buff.buff + bom_len, 1, capacity - bom_len, scanner->in);
+    } else { // no BOM. keep the bytes we just read
+      // bytes_read = bom_len + scanner->read(scanner->buff.buff + bom_len, 1, capacity - bom_len, scanner->in);
+      if(bytes_read == bom_len) // maybe we only read < 3 bytes
+        bytes_read += scanner->read(scanner->buff.buff + bom_len, 1, capacity - bom_len, scanner->in);
+    }
   } else // already checked bom. read as usual
     bytes_read = scanner->read(scanner->buff.buff + scanner->partial_row_length, 1,
                                capacity, scanner->in);
@@ -194,6 +206,10 @@ enum zsv_status zsv_next_row(zsv_parser parser) {
     parser->mode = ZSV_MODE_DELIM_PULL;
     zsv_set_row_handler(parser, zsv_pull_row);
     zsv_set_context(parser, parser);
+    if(parser->insert_string != NULL)
+      parser->pull.stat = zsv_insert_string(parser);
+    if(parser->pull.stat == zsv_status_row)
+      return parser->pull.stat;
   }
   if(VERY_LIKELY(parser->pull.stat == zsv_status_row))
     parser->pull.stat = zsv_scan_delim_pull(parser, parser->pull.buff, parser->pull.bytes_read);
@@ -364,6 +380,8 @@ enum zsv_status zsv_finish(struct zsv_scanner *scanner) {
     return zsv_status_error;
   if(!scanner->abort) {
     if(scanner->mode == ZSV_MODE_FIXED) {
+      if(scanner->partial_row_length && memchr("\n\r", scanner->buff.buff[scanner->partial_row_length-1], 2))
+        scanner->partial_row_length--;
       if(scanner->partial_row_length)
         return row_fx(scanner, scanner->buff.buff, 0, scanner->partial_row_length);
       return zsv_status_ok;