瀏覽代碼

Updated to zsv 0.3.8.93d349e (#23)

Brucey 1 年之前
父節點
當前提交
fb70798b6d
共有 56 個文件被更改,包括 1673 次插入366 次删除
  1. 3 1
      csv.mod/csv.bmx
  2. 342 0
      csv.mod/zsv/.github/workflows/ci.yml
  3. 19 29
      csv.mod/zsv/Makefile
  4. 5 3
      csv.mod/zsv/README.md
  5. 22 6
      csv.mod/zsv/app/2db.c
  6. 2 2
      csv.mod/zsv/app/2json.c
  7. 2 2
      csv.mod/zsv/app/2tsv.c
  8. 42 15
      csv.mod/zsv/app/Makefile
  9. 11 4
      csv.mod/zsv/app/cli.c
  10. 86 26
      csv.mod/zsv/app/compare.c
  11. 3 1
      csv.mod/zsv/app/compare.h
  12. 7 11
      csv.mod/zsv/app/compare_internal.h
  13. 5 29
      csv.mod/zsv/app/compare_sort.c
  14. 2 2
      csv.mod/zsv/app/count-pull.c
  15. 2 2
      csv.mod/zsv/app/count.c
  16. 4 3
      csv.mod/zsv/app/desc.c
  17. 29 5
      csv.mod/zsv/app/echo.c
  18. 2 2
      csv.mod/zsv/app/flatten.c
  19. 172 0
      csv.mod/zsv/app/paste.c
  20. 4 3
      csv.mod/zsv/app/pretty.c
  21. 3 4
      csv.mod/zsv/app/prop.c
  22. 103 0
      csv.mod/zsv/app/schema/database-table.json
  23. 45 14
      csv.mod/zsv/app/select-pull.c
  24. 45 14
      csv.mod/zsv/app/select.c
  25. 30 17
      csv.mod/zsv/app/serialize.c
  26. 54 14
      csv.mod/zsv/app/sql.c
  27. 4 3
      csv.mod/zsv/app/stack.c
  28. 54 8
      csv.mod/zsv/app/test/Makefile
  29. 2 2
      csv.mod/zsv/app/test/expected/test-compare.out5
  30. 6 0
      csv.mod/zsv/app/test/expected/test-compare.out8
  31. 5 0
      csv.mod/zsv/app/test/expected/test-compare.out9
  32. 1 0
      csv.mod/zsv/app/utils/arg.c
  33. 10 2
      csv.mod/zsv/app/utils/cache.c
  34. 2 0
      csv.mod/zsv/app/utils/dirs-no-jq.c
  35. 7 3
      csv.mod/zsv/app/utils/dirs.c
  36. 5 1
      csv.mod/zsv/app/utils/dirs_from_json.c
  37. 6 7
      csv.mod/zsv/app/utils/file.c
  38. 19 3
      csv.mod/zsv/app/utils/os.c
  39. 122 33
      csv.mod/zsv/app/utils/prop.c
  40. 6 0
      csv.mod/zsv/app/utils/string.c
  41. 41 16
      csv.mod/zsv/app/utils/writer.c
  42. 1 1
      csv.mod/zsv/app/zsv_command_standalone.c
  43. 3 1
      csv.mod/zsv/app/zsv_main.h
  44. 6 0
      csv.mod/zsv/data/loans_1-overwrite.csv
  45. 26 0
      csv.mod/zsv/include/zsv/common.h
  46. 6 0
      csv.mod/zsv/include/zsv/utils/dirs.h
  47. 2 1
      csv.mod/zsv/include/zsv/utils/os.h
  48. 37 8
      csv.mod/zsv/include/zsv/utils/prop.h
  49. 9 1
      csv.mod/zsv/include/zsv/utils/writer.h
  50. 3 0
      csv.mod/zsv/scripts/ci-freebsd-setup.sh
  51. 8 7
      csv.mod/zsv/src/Makefile
  52. 23 48
      csv.mod/zsv/src/zsv.c
  53. 161 9
      csv.mod/zsv/src/zsv_internal.c
  54. 4 2
      csv.mod/zsv/src/zsv_scan_delim.c
  55. 1 1
      csv.mod/zsv/src/zsv_scan_fixed.c
  56. 49 0
      csv.mod/zsv/src/zsv_strencode.c

+ 3 - 1
csv.mod/csv.bmx

@@ -25,12 +25,14 @@ bbdoc: A CSV parser.
 End Rem
 Module Text.CSV
 
-ModuleInfo "Version: 1.02"
+ModuleInfo "Version: 1.04"
 ModuleInfo "Author: Bruce A Henderson"
 ModuleInfo "License: MIT"
 ModuleInfo "zsv - Copyright (c) 2021 Guarnerix Inc dba Liquidaty"
 ModuleInfo "Copyright: 2022-2023 Bruce A Henderson"
 
+ModuleInfo "History: 1.04"
+ModuleInfo "History: Updated to zsv 0.3.8.93d349e"
 ModuleInfo "History: 1.03"
 ModuleInfo "History: Updated to zsv 0.3.6.fdcd18e"
 ModuleInfo "History: 1.02"

+ 342 - 0
csv.mod/zsv/.github/workflows/ci.yml

@@ -0,0 +1,342 @@
+name: ci
+
+on:
+  push:
+    branches: [main]
+    paths-ignore:
+    - "**.md"
+  pull_request:
+    branches: [main]
+    paths-ignore:
+    - "**.md"
+  release:
+    types: [published]
+
+jobs:
+  ci:
+    name: ci
+    strategy:
+      matrix:
+        os: [ubuntu-20.04, macos-12]
+
+    runs-on: ${{ matrix.os }}
+
+    env:
+      TAG: "0.3.6"
+      AMD64_LINUX_GCC: amd64-linux-gcc
+      AMD64_LINUX_CLANG: amd64-linux-clang
+      AMD64_WINDOWS_MINGW: amd64-windows-mingw
+      AMD64_MACOSX_GCC: amd64-macosx-gcc
+      AMD64_FREEBSD_GCC: amd64-freebsd-gcc
+      ARTIFACT_DIR: .artifacts
+      ARTIFACT_RETENTION_DAYS: 5
+
+    steps:
+    - name: Get tag if tagged/released and set TAG env var
+      if: startsWith(github.ref, 'refs/tags/v')
+      shell: bash
+      run: |
+        TAG=$(echo $GITHUB_REF | cut -d '/' -f3)
+        echo "TAG: $TAG"
+        if [[ $TAG == "v"* ]]; then
+          TAG="${TAG:1}"
+        fi
+        echo "TAG: $TAG"
+        echo "TAG=$TAG" >> $GITHUB_ENV
+
+    - name: Checkout
+      uses: actions/checkout@v3
+
+    - name: Set up Linux
+      if: runner.os == 'Linux'
+      run: |
+        sudo apt update
+        sudo apt install -y mingw-w64 rpm alien nuget
+        sudo apt remove -y jq
+
+    - name: Set up macOS
+      if: runner.os == 'macOS'
+      run: |
+        brew install coreutils tree autoconf automake libtool
+        brew uninstall jq
+
+    # --- Build ---
+
+    - name: Build on macOS-12 (${{ env.AMD64_FREEBSD_GCC }})
+      if: matrix.os == 'macos-12'
+      env:
+        PREFIX: ${{ env.AMD64_FREEBSD_GCC }}
+        CC: gcc
+        MAKE: gmake
+        RUN_TESTS: false
+#      uses: vmactions/freebsd-vm@v0   # https://github.com/vmactions/freebsd-vm
+      uses: cross-platform-actions/[email protected]
+      with:
+#        mem: 2048
+#        release: 12.3
+#        envs: 'PREFIX CC MAKE RUN_TESTS ARTIFACT_DIR'
+#        usesh: true
+#        prepare: pkg install -y tree zip git autotools gmake lang/gcc
+#        run: |
+#          ./scripts/ci-build.sh
+
+### valid inputs are ['run', 'operating_system', 'architecture', 'version', 'shell', 'environment_variables', 'memory', 'cpu_count', 'hypervisor', 'image_url', 'sync_files']
+        memory: 2048
+        shell: sh
+        operating_system: freebsd
+        version: '13.2'
+        environment_variables: 'PREFIX CC MAKE RUN_TESTS ARTIFACT_DIR'
+        run: |
+          ./scripts/ci-freebsd-setup.sh
+          ./scripts/ci-build.sh
+
+    - name: Build on Linux (${{ env.AMD64_LINUX_GCC }})
+      if: runner.os == 'Linux'
+      env:
+        PREFIX: ${{ env.AMD64_LINUX_GCC }}
+        CC: gcc
+        MAKE: make
+        RUN_TESTS: true
+      shell: bash
+      run: |
+        ./scripts/ci-build.sh
+        ./scripts/ci-create-debian-package.sh
+        ./scripts/ci-create-rpm-package.sh
+
+    - name: Build on Linux (${{ env.AMD64_LINUX_CLANG }})
+      if: runner.os == 'Linux'
+      env:
+        PREFIX: ${{ env.AMD64_LINUX_CLANG }}
+        CC: clang
+        MAKE: make
+        RUN_TESTS: true
+      shell: bash
+      run: |
+        ./scripts/ci-build.sh
+        ./scripts/ci-create-debian-package.sh
+        ./scripts/ci-create-rpm-package.sh
+
+    - name: Build on Linux (${{ env.AMD64_WINDOWS_MINGW }})
+      if: runner.os == 'Linux'
+      env:
+        PREFIX: ${{ env.AMD64_WINDOWS_MINGW }}
+        CC: x86_64-w64-mingw32-gcc
+        MAKE: make
+        RUN_TESTS: false
+        CXX: x86_64-w64-mingw32-g++
+        CPP: x86_64-w64-mingw32-cpp
+        RANLIB: x86_64-w64-mingw32-ranlib
+        AR: x86_64-w64-mingw32-ar
+        NM: x86_64-w64-mingw32-nm
+        WINDRES: x86_64-w64-mingw32-windres
+
+      shell: bash
+      run: |
+        ./scripts/ci-build.sh
+        ./scripts/ci-create-nuget-package.sh
+
+    - name: Build on macOS (${{ env.AMD64_MACOSX_GCC }})
+      if: runner.os == 'macOS'
+      env:
+        PREFIX: ${{ env.AMD64_MACOSX_GCC }}
+        CC: gcc-11
+        MAKE: make
+        RUN_TESTS: false
+      shell: bash
+      run: |
+        ./scripts/ci-build.sh
+        ./$PREFIX/bin/zsv version
+
+    # --- Upload build artifacts ---
+
+    - name: Prepare build artifacts for upload
+      run: ./scripts/ci-prepare-artifacts-for-upload.sh
+
+    - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_GCC }}.zip)
+      if: runner.os == 'Linux'
+      uses: actions/upload-artifact@v3
+      env:
+        ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_GCC }}.zip
+      with:
+        name: ${{ env.ARTIFACT_NAME }}
+        path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
+        retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
+        if-no-files-found: error
+
+    - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.zip)
+      if: runner.os == 'Linux'
+      uses: actions/upload-artifact@v3
+      env:
+        ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.zip
+      with:
+        name: ${{ env.ARTIFACT_NAME }}
+        path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
+        retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
+        if-no-files-found: error
+
+    - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_GCC }}.deb)
+      if: runner.os == 'Linux'
+      uses: actions/upload-artifact@v3
+      env:
+        ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_GCC }}.deb
+      with:
+        name: ${{ env.ARTIFACT_NAME }}
+        path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
+        retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
+        if-no-files-found: error
+
+    - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.deb)
+      if: runner.os == 'Linux'
+      uses: actions/upload-artifact@v3
+      env:
+        ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.deb
+      with:
+        name: ${{ env.ARTIFACT_NAME }}
+        path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
+        retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
+        if-no-files-found: error
+
+    - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_GCC }}.rpm)
+      if: runner.os == 'Linux'
+      uses: actions/upload-artifact@v3
+      env:
+        ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_GCC }}.rpm
+      with:
+        name: ${{ env.ARTIFACT_NAME }}
+        path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
+        retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
+        if-no-files-found: error
+
+    - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.rpm)
+      if: runner.os == 'Linux'
+      uses: actions/upload-artifact@v3
+      env:
+        ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.rpm
+      with:
+        name: ${{ env.ARTIFACT_NAME }}
+        path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
+        retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
+        if-no-files-found: error
+
+    - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_WINDOWS_MINGW }}.zip)
+      if: runner.os == 'Linux'
+      uses: actions/upload-artifact@v3
+      env:
+        ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_WINDOWS_MINGW }}.zip
+      with:
+        name: ${{ env.ARTIFACT_NAME }}
+        path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
+        retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
+        if-no-files-found: error
+
+    - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_WINDOWS_MINGW }}.nupkg)
+      if: runner.os == 'Linux'
+      uses: actions/upload-artifact@v3
+      env:
+        ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_WINDOWS_MINGW }}.nupkg
+      with:
+        name: ${{ env.ARTIFACT_NAME }}
+        path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
+        retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
+        if-no-files-found: error
+
+    - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_MACOSX_GCC }}.zip)
+      if: runner.os == 'macOS'
+      uses: actions/upload-artifact@v3
+      env:
+        ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_MACOSX_GCC }}.zip
+      with:
+        name: ${{ env.ARTIFACT_NAME }}
+        path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
+        retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
+        if-no-files-found: error
+
+    - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_FREEBSD_GCC }}.zip)
+      if: matrix.os == 'macos-12'
+      uses: actions/upload-artifact@v3
+      env:
+        ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_FREEBSD_GCC }}.zip
+      with:
+        name: ${{ env.ARTIFACT_NAME }}
+        path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
+        retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
+        if-no-files-found: error
+
+    - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_GCC }}.tar.gz)
+      if: runner.os == 'Linux'
+      uses: actions/upload-artifact@v3
+      env:
+        ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_GCC }}.tar.gz
+      with:
+        name: ${{ env.ARTIFACT_NAME }}
+        path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
+        retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
+        if-no-files-found: error
+
+    - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.tar.gz)
+      if: runner.os == 'Linux'
+      uses: actions/upload-artifact@v3
+      env:
+        ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.tar.gz
+      with:
+        name: ${{ env.ARTIFACT_NAME }}
+        path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
+        retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
+        if-no-files-found: error
+
+    - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_WINDOWS_MINGW }}.tar.gz)
+      if: runner.os == 'Linux'
+      uses: actions/upload-artifact@v3
+      env:
+        ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_WINDOWS_MINGW }}.tar.gz
+      with:
+        name: ${{ env.ARTIFACT_NAME }}
+        path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
+        retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
+        if-no-files-found: error
+
+    - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_MACOSX_GCC }}.tar.gz)
+      if: runner.os == 'macOS'
+      uses: actions/upload-artifact@v3
+      env:
+        ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_MACOSX_GCC }}.tar.gz
+      with:
+        name: ${{ env.ARTIFACT_NAME }}
+        path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
+        retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
+        if-no-files-found: error
+
+    - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_FREEBSD_GCC }}.tar.gz)
+      if: matrix.os == 'macos-12'
+      uses: actions/upload-artifact@v3
+      env:
+        ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_FREEBSD_GCC }}.tar.gz
+      with:
+        name: ${{ env.ARTIFACT_NAME }}
+        path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
+        retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
+        if-no-files-found: error
+
+    # --- Upload release artifacts ---
+
+    - name: Upload release artifacts
+      uses: softprops/action-gh-release@v1
+      if: startsWith(github.ref, 'refs/tags/v')
+      with:
+        files: |
+          ${{ env.ARTIFACT_DIR }}/*.zip
+          ${{ env.ARTIFACT_DIR }}/*.tar.gz
+          ${{ env.ARTIFACT_DIR }}/*.deb
+          ${{ env.ARTIFACT_DIR }}/*.rpm
+          ${{ env.ARTIFACT_DIR }}/*.nupkg
+
+    # --- Update homebrew tap ---
+
+    - name: Update homebrew tap (liquidaty/homebrew-zsv)
+      if: ${{ startsWith(github.ref, 'refs/tags/v') && runner.os == 'macOS' }}
+      env:
+        HOMEBREW_TAP_DEPLOY_KEY: ${{ secrets.HOMEBREW_TAP_DEPLOY_KEY }}
+        TAG: ${{ env.TAG }}
+        TRIPLET: ${{ env.AMD64_MACOSX_GCC }}
+      run: |
+        ./scripts/ci-update-homebrew-tap.sh

+ 19 - 29
csv.mod/zsv/Makefile

@@ -4,6 +4,8 @@ THIS_MAKEFILE_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
 THIS_DIR:=$(shell basename "${THIS_MAKEFILE_DIR}")
 THIS_MAKEFILE:=$(lastword $(MAKEFILE_LIST))
 
+THIS_MAKE=`basename ${MAKE}`
+
 CONFIGFILE ?= config.mk
 include ${CONFIGFILE}
 
@@ -23,59 +25,47 @@ help:
 	@echo "build and run as described in docs/extension.md"
 	@echo
 	@echo "To build, test and install zsvlib and zsv:"
-	@echo "  ./configure && ${MAKE} test"
+	@echo "  ./configure && ${THIS_MAKE} test"
 	@echo
 	@echo "To build and install zsvlib and zsv:"
-	@echo "  ./configure && ${MAKE} install"
+	@echo "  ./configure && ${THIS_MAKE} install"
 	@echo
 	@echo "To build and install only zsvlib:"
-	@echo "  ./configure && ${MAKE} lib"
+	@echo "  ./configure && ${THIS_MAKE} -C src install"
 	@echo
 	@echo "To build and install only zsv (i.e. install both, remove zsvlib):"
-	@echo "  ./configure && ${MAKE} install && ${MAKE} uninstall-lib"
+	@echo "  ./configure && ${THIS_MAKE} install && ${THIS_MAKE} -C src uninstall"
 	@echo
 	@echo "To save and build from a configuration without losing the current one,"
 	@echo "use the configuration option CONFIGFILE e.g.:"
 	@echo "  ./configure --config-file=/path/to/config.custom"
-	@echo "  ./configure && ${MAKE} -C src CONFIGFILE=/path/to/config.custom install"
+	@echo "  ./configure && ${THIS_MAKE} -C src CONFIGFILE=/path/to/config.custom install"
 	@echo
 	@echo "To clean (remove temporary build objects) (after running configure):"
-	@echo "  ${MAKE} clean"
+	@echo "  ${THIS_MAKE} clean"
 	@echo
 	@echo "To uninstall libs and apps:"
-	@echo "  ${MAKE} uninstall"
+	@echo "  ${THIS_MAKE} uninstall"
+	@echo
+	@echo "To test:"
+	@echo "  ${THIS_MAKE} test"
 	@echo
 	@echo "Additional make options available for the library or the apps by"
-	@echo "  running ${MAKE} from the src or app directory"
+	@echo "  running ${THIS_MAKE} from the src or app directory"
 	@echo
 	@echo "For more information, see README.md"
 
-lib:
-	@${MAKE} -C src install CONFIGFILE=${CONFIGFILEPATH}
-
-test:
+check test:
 	@${MAKE} -C app test CONFIGFILE=${CONFIGFILEPATH}
 	@${MAKE} -C examples/lib test CONFIGFILE=${CONFIGFILEPATH}
 
-install:
-	@${MAKE} -C src install CONFIGFILE=${CONFIGFILEPATH}
-	@${MAKE} -C app install CONFIGFILE=${CONFIGFILEPATH}
-
-all:
-	@${MAKE} -C src install CONFIGFILE=${CONFIGFILEPATH}
-	@${MAKE} -C app all CONFIGFILE=${CONFIGFILEPATH}
+build install uninstall: % :
+	@${MAKE} -C src $* CONFIGFILE=${CONFIGFILEPATH}
+	@${MAKE} -C app $* CONFIGFILE=${CONFIGFILEPATH}
 
 clean:
-	@${MAKE} -C app clean-all CONFIGFILE=${CONFIGFILEPATH}
 	@${MAKE} -C src clean CONFIGFILE=${CONFIGFILEPATH}
+	@${MAKE} -C app clean-all CONFIGFILE=${CONFIGFILEPATH}
 	@rm -rf ${THIS_MAKEFILE_DIR}/build
 
-uninstall: uninstall-lib uninstall-app
-
-uninstall-app:
-	${MAKE} -C app uninstall CONFIGFILE=${CONFIGFILEPATH}
-
-uninstall-lib:
-	${MAKE} -C src uninstall CONFIGFILE=${CONFIGFILEPATH}
-
-.PHONY: help install uninstall uninstall-app uninstall-lib
+.PHONY: help build install uninstall uninstall clean check test

+ 5 - 3
csv.mod/zsv/README.md

@@ -22,7 +22,7 @@ It achieves high performance using SIMD operations,
 [efficient memory use](docs/memory.md) and other optimization techniques, and
 can also parse generic-delimited and fixed-width formats, as well as multi-row-span headers
 
-The ZSV CLI can be compiled to virtually any target, including [web assembly](examples/js), and offers features including `select`, `count`, direct CSV `sql`, `flatten`, `serialize`, `2json` conversion, `2db` sqlite3 conversion, `stack`, `pretty`, `2tsv`, `compare` and more.
+The ZSV CLI can be compiled to virtually any target, including [web assembly](examples/js), and offers features including `select`, `count`, direct CSV `sql`, `flatten`, `serialize`, `2json` conversion, `2db` sqlite3 conversion, `stack`, `pretty`, `2tsv`, `compare`, `paste` and more.
 
 Pre-built CLI packages are available via brew and nuget
 
@@ -65,7 +65,7 @@ that implements the expected
 
 ## Key highlights
 
-* Available as BOTH a library and an application
+* Available as BOTH a library and an application (coming soon: standalone zsvutil library for common helper functions such as csv writer)
 * Open-source, permissively licensed
 * Handles real-world CSV the same way that spreadsheet programs do (*including
   edge cases*). Gracefully handles (and can "clean") real-world data that may be
@@ -85,7 +85,7 @@ that implements the expected
 * Easy to use as a library in a few lines of code, via either pull or push parsing
 * Includes the `zsv` CLI with the following built-in commands:
   * `select`, `count`, `sql` query, `desc`ribe, `flatten`, `serialize`, `2json`,
-    `2db`, `stack`, `pretty`, `2tsv`, `compare`, `jq`, `prop`, `rm`
+    `2db`, `stack`, `pretty`, `2tsv`, `paste`, `compare`, `jq`, `prop`, `rm`
   * easily [convert between CSV/JSON/sqlite3](docs/csv_json_sqlite.md)
   * [compare multiple files](docs/compare.md)
 
@@ -237,6 +237,8 @@ for speed and ease of development for extending and/or customizing to your needs
 * `2json`: convert CSV to JSON. Optionally, output in [database schema](docs/db.schema.json)
 * `2tsv`: convert CSV to TSV
 * `compare`: compare two or more tables of data and output the differences
+* `paste` (alpha): horizontally paste two tables together (given inputs X and Y,
+   output 1...N rows where each row all columns of X in row N, followed by all columns of Y in row N)
 * `serialize` (inverse of flatten): convert an NxM table to a single 3x (Nx(M-1))
   table with columns: Row, Column Name, Column Value
 * `flatten` (inverse of serialize): flatten a table by combining rows that share

+ 22 - 6
csv.mod/zsv/app/2db.c

@@ -19,7 +19,7 @@
 #include <zsv/utils/mem.h>
 #include <zsv/utils/string.h>
 
-#include <yajl_helper.h>
+#include <yajl_helper/yajl_helper.h>
 
 #define ZSV_2DB_DEFAULT_TABLE_NAME "mytable"
 
@@ -72,9 +72,7 @@ struct zsv_2db_data {
   char *connection_string;
 
   struct {
-//    yajl_handle handle;
     struct yajl_helper_parse_state st;
-//    yajl_callbacks callbacks;
     yajl_status yajl_stat;
     enum zsv_2db_state state;
 
@@ -669,7 +667,7 @@ static yajl_handle zsv_2db_yajl_handle(zsv_2db_handle data) {
   return data->json_parser.st.yajl;
 }
 
-int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *zsv_opts, const char *opts_used) {
+int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *zsv_opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
   (void)(zsv_opts);
   (void)(opts_used);
   FILE *f_in = NULL;
@@ -683,6 +681,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *zs
      "",
      "Usage: " APPNAME " -o <output path> [-t <table name>] [input.json]\n",
      "",
+     "Alternatively, --output may be used in lieu of -o",
+     "",
      "Options:",
      "  -h,--help",
      "  --table <table_name> : save as specified table name",
@@ -691,8 +691,24 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *zs
      // --sql to output sql statements
      // --append: append to existing db
      "",
-     "Miscellaneous:",
-     "  Alternatively, --output may be used in lieu of -o",
+     "Converts a json representation of a database table to an sqlite3 file.",
+     "",
+
+     // TO DO: add output examples and schema descriptions
+     "The input should conform to the schema defined at:",
+     "  https://github.com/liquidaty/zsv/blob/main/app/schema/database-table.json",
+     "",
+     "For example:",
+     "  [",
+     "    {",
+     "      \"columns\":[{\"name\":\"column 1\"}],",
+     "      \"indexes\":{\"ix1\":{\"on\":\"[column 1]\",\"unique\":true}}",
+     "    },",
+     "    [",
+     "      [\"row 1 cell 1\"],",
+     "      [\"row 2 cell 1\"]",
+     "    ]",
+     "  ]",
      NULL
     };
 

+ 2 - 2
csv.mod/zsv/app/2json.c

@@ -224,7 +224,7 @@ static int zsv_db2json(const char *input_filename, char **tname, jsonwriter_hand
   return err;
 }
 
-int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used) {
+int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
   struct zsv_2json_data data = { 0 };
   data.headers_next = &data.headers;
 
@@ -357,7 +357,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
       } else {
         opts->row_handler = zsv_2json_row;
         opts->ctx = &data;
-        if(zsv_new_with_properties(opts, input_path, opts_used, &data.parser) == zsv_status_ok) {
+        if(zsv_new_with_properties(opts, custom_prop_handler, input_path, opts_used, &data.parser) == zsv_status_ok) {
           zsv_handle_ctrl_c_signal();
           while(!data.err
                 && !zsv_signal_interrupted

+ 2 - 2
csv.mod/zsv/app/2tsv.c

@@ -151,7 +151,7 @@ int zsv_2tsv_usage(int rc) {
   return rc;
 }
 
-int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used) {
+int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
   struct zsv_2tsv_data data = { 0 };
   const char *input_path = NULL;
   int err = 0;
@@ -194,7 +194,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
 
   opts->row_handler = zsv_2tsv_row;
   opts->ctx = &data;
-  if(zsv_new_with_properties(opts, input_path, opts_used, &data.parser) == zsv_status_ok) {
+  if(zsv_new_with_properties(opts, custom_prop_handler, input_path, opts_used, &data.parser) == zsv_status_ok) {
     char output[ZSV_2TSV_BUFF_SIZE];
     data.out.buff = output;
 

+ 42 - 15
csv.mod/zsv/app/Makefile

@@ -134,12 +134,9 @@ CFLAGS+= -I${PREFIX}/include
 THIS_LIB_BASE=$(shell cd .. && pwd)
 INCLUDE_DIR=${THIS_LIB_BASE}/include
 BUILD_DIR=${THIS_LIB_BASE}/build/${BUILD_SUBDIR}/${CCBN}
-UTILS1=writer file err signal mem clock arg dl string dirs prop cache jq
+UTILS1=writer file err signal mem clock arg dl string dirs prop cache jq os
 
 ZSV_EXTRAS ?=
-ifneq ($(WIN),0)
- UTILS1+= os
-endif
 
 ifneq ($(findstring emcc,$(CC)),) # emcc
   ZSV_EXTRAS=1
@@ -170,8 +167,8 @@ endif
 
 ZSV=$(BINDIR)/zsv${EXE}
 
-SOURCES= echo count count-pull select select-pull 2tsv 2json serialize flatten pretty stack desc sql 2db compare prop rm mv jq
-CLI_SOURCES=echo select desc count 2tsv pretty sql flatten 2json serialize stack 2db compare prop rm mv jq
+SOURCES= echo paste count count-pull select select-pull 2tsv 2json serialize flatten pretty stack desc sql 2db compare prop rm mv jq
+CLI_SOURCES=echo select desc count paste 2tsv pretty sql flatten 2json serialize stack 2db compare prop rm mv jq
 
 CFLAGS+= -DUSE_JQ
 
@@ -208,13 +205,15 @@ INIH_INCLUDE=${THIS_MAKEFILE_DIR}/external/inih
 INIH_OBJECT=${BUILD_DIR}-external/inih/inih.o
 CLI_INCLUDE+= -I${INIH_INCLUDE}
 
-YAJL_SRC=${THIS_MAKEFILE_DIR}/external/yajl/yajl.c
+YAJL_SRC_DIR=${THIS_MAKEFILE_DIR}/external/yajl
 YAJL_OBJ1=yajl yajl_alloc yajl_buf yajl_encode yajl_gen yajl_lex yajl_parser yajl_tree yajl_version
 YAJL_OBJ=$(addprefix ${BUILD_DIR}-external/yajl/,$(addsuffix .o,${YAJL_OBJ1}))
-YAJL_INCLUDE=-I${THIS_MAKEFILE_DIR}/external/yajl/build/yajl-2.1.1/include
+YAJL_INCLUDE=-I${YAJL_SRC_DIR}/build/yajl-2.1.1/include
 
 YAJL_HELPER_OBJ=${BUILD_DIR}-external/yajl_helper/yajl_helper.o
-YAJL_HELPER_INCLUDE=-I${THIS_MAKEFILE_DIR}/external/yajl_helper
+# YAJL_HELPER_INCLUDE=-I${THIS_MAKEFILE_DIR}/external/yajl_helper
+YAJL_HELPER_INCLUDE=-I${THIS_MAKEFILE_DIR}/external
+CFLAGS+=${YAJL_HELPER_INCLUDE} ${YAJL_INCLUDE}
 
 # jq
 JQ_TARBALL=${THIS_MAKEFILE_DIR}/external/jq-1.6.tar.bz2
@@ -262,15 +261,36 @@ help:
 	@echo "which will build and test all apps, or to build/test a single app:"
 	@echo "  ${MAKE} test-xx"
 	@echo "where xx is any of:"
-	@echo "  echo count count-pull select select-pull 2tsv 2json serialize flatten pretty stack desc sql 2db prop rm mv"
+	@echo "  echo count count-pull paste select select-pull 2tsv 2json serialize flatten pretty stack desc sql 2db prop rm mv"
 	@echo ""
 
 install: ${ZSV}
 
+uninstall-all: uninstall uninstall-util-lib
+
 uninstall:
 	rm -rf ${ZSV}
 
-build: all
+ZSV_UTIL_A=${LIBDIR}/libzsvutil.a
+${ZSV_UTIL_A}: ${BUILD_DIR}/objs/utils/util.a
+	@mkdir -p `dirname $@`
+	cp -p $< $@
+
+UTIL_A_OBJ:=writer file dirs-no-jq os
+UTIL_A_OBJ:=$(addprefix ${BUILD_DIR}/objs/utils/,$(addsuffix .o,${UTIL_A_OBJ}))
+
+${BUILD_DIR}/objs/utils/util.a: ${UTIL_A_OBJ}
+	$(AR) rcv $@ $^ # $?
+	$(RANLIB) $@
+	$(AR) -t $@ # check it is there
+	@echo Built $@
+
+uninstall-util-lib:
+	@rm -f ${ZSV_UTIL_A}
+
+install-util-lib: ${ZSV_UTIL_A}
+
+build: ${CLI}
 
 all: ${TARGETS}
 
@@ -300,7 +320,7 @@ ${CLEANS}: clean-%:
 	rm -f ${STANDALONE_PFX}$*${EXE}
 	rm -f ${BUILD_DIR}/*/*$*.o
 
-.PHONY: all install cli build build-% clean clean-% test test-% lib-%
+.PHONY: all install cli build build-% clean clean-% test test-% lib-% check install-util-lib uninstall-util-lib clean-util-lib
 
 .SECONDARY: ${OBJECTS}
 
@@ -312,6 +332,10 @@ ${BUILD_DIR}/objs/utils/%.o : utils/%.c ${INCLUDE_DIR}/zsv/utils/%.h ${JQ_LIB}
 	@mkdir -p `dirname "$@"`
 	${CC} ${CFLAGS} -I${INCLUDE_DIR} -I${UTF8PROC_INCLUDE} -DINCLUDE_SRC -o $@ -c utils/$*.c ${MORE_SOURCE}
 
+${BUILD_DIR}/objs/utils/dirs-no-jq.o : utils/dirs.c ${INCLUDE_DIR}/zsv/utils/dirs.h
+	@mkdir -p `dirname "$@"`
+	${CC} ${CFLAGS} -I${INCLUDE_DIR} -I${UTF8PROC_INCLUDE} -DINCLUDE_SRC -o $@ -c utils/dirs-no-jq.c ${MORE_SOURCE}
+
 ${BUILD_DIR}/objs/zsv_%.o: %.c
 	@mkdir -p `dirname "$@"`
 	${CC} ${CFLAGS} -I${INCLUDE_DIR} -I${UTF8PROC_INCLUDE} -c $< -o $@
@@ -409,9 +433,9 @@ ${YAJL_OBJ}: ${BUILD_DIR}-external/yajl/%.o : external/yajl/src/%.c
 
 ${YAJL_HELPER_OBJ}: external/yajl_helper/yajl_helper.c
 	@mkdir -p `dirname "$@"`
-	${CC} ${CFLAGS} -I${BASEDIR}/yajl_helper ${YAJL_INCLUDE} ${YAJL_HELPER_INCLUDE} -c $< -o $@
+	${CC} ${CFLAGS} ${YAJL_INCLUDE} ${YAJL_HELPER_INCLUDE} -c $< -o $@
 
-test:	test-standalone test-cli
+check test:	test-standalone test-cli
 
 test-standalone:
 	@${MAKE} -C test test QUIET=1 LEAKS=${LEAKS} CONFIGFILE=${CONFIGFILEPATH} DEBUG=${DEBUG}
@@ -419,11 +443,14 @@ test-standalone:
 test-cli:  ${CLI}
 	@${MAKE} -C test $@ QUIET=1 LEAKS=${LEAKS} CONFIGFILE=${CONFIGFILEPATH} DEBUG=${DEBUG} CLI=${CLI}
 
-clean-all: clean clean-external clean-obj clean-lib
+clean-all: clean clean-external clean-obj clean-lib clean-util-lib
 
 clean-external:
 	@rm -rf ${JQ_SRC}
 
+clean-util-lib:
+	@rm -f ${BUILD_DIR}/objs/utils/util.a
+
 clean-lib:
 	@rm -rf ${BUILD_DIR}-external
 

+ 11 - 4
csv.mod/zsv/app/cli.c

@@ -11,6 +11,7 @@
 #include <string.h>
 #include <zsv/utils/arg.h>
 #include <zsv/utils/dl.h>
+#include <zsv/utils/prop.h>
 #include <zsv/utils/string.h>
 #include <zsv/utils/dirs.h>
 #include <zsv/utils/signal.h>
@@ -32,7 +33,7 @@ static struct zsv_ext *zsv_ext_new(const char *dl_name, const char *id, char ver
 #include "cli_ini.c"
 
 typedef int (cmd_main)(int argc, const char *argv[]);
-typedef int (zsv_cmd)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used);
+typedef int (zsv_cmd)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used);
 typedef int (*cmd_reserved)();
 
 struct builtin_cmd {
@@ -56,6 +57,7 @@ CLI_BUILTIN_DECL_STATIC(unregister);
 
 ZSV_MAIN_DECL(select);
 ZSV_MAIN_DECL(count);
+ZSV_MAIN_DECL(paste);
 ZSV_MAIN_DECL(2json);
 ZSV_MAIN_DECL(2tsv);
 ZSV_MAIN_DECL(serialize);
@@ -88,6 +90,7 @@ struct builtin_cmd builtin_cmds[] = {
 
   CLI_BUILTIN_COMMAND(select),
   CLI_BUILTIN_COMMAND(count),
+  CLI_BUILTIN_COMMAND(paste),
   CLI_BUILTIN_COMMAND(2json),
   CLI_BUILTIN_COMMAND(2tsv),
   CLI_BUILTIN_COMMAND(serialize),
@@ -290,9 +293,12 @@ static enum zsv_ext_status ext_add_command(zsv_execution_context ctx,
 static enum zsv_ext_status ext_parse_all(zsv_execution_context ctx,
                                          void *user_context,
                                          void (*row_handler)(void *ctx),
-                                         struct zsv_opts *const custom
+                                         struct zsv_opts *const custom,
+                                         struct zsv_prop_handler *custom_prop
                                          ) {
   struct zsv_opts opts = custom ? *custom : ext_parser_opts(ctx);
+  struct zsv_prop_handler custom_prop_handler = custom_prop ? *custom_prop : zsv_get_default_custom_prop_handler();
+
   if(row_handler)
     opts.row_handler = row_handler;
   zsv_parser parser = zsv_new(&opts);
@@ -401,6 +407,7 @@ static enum zsv_ext_status run_extension(int argc, const char *argv[], struct zs
       struct zsv_opts opts;
       zsv_args_to_opts(argc, argv, &argc, argv, &opts, NULL);
       zsv_set_default_opts(opts);
+      // need a corresponding zsv_set_default_custom_prop_handler?
       stat = cmd->main(&ctx, ctx.argc - 1, &ctx.argv[1]);
     }
 
@@ -481,7 +488,7 @@ int ZSV_CLI_MAIN(int argc, const char *argv[]) {
         else if(help_builtin->cmd) {
           char opts_used[ZSV_OPTS_SIZE_MAX] = { 0 };
           struct zsv_opts opts = { 0 };
-          return help_builtin->cmd(2, argv_tmp, &opts, opts_used);
+          return help_builtin->cmd(2, argv_tmp, &opts, NULL, opts_used);
         } else
           return fprintf(stderr, "Unexpected syntax!\n");
       } else {
@@ -507,7 +514,7 @@ int ZSV_CLI_MAIN(int argc, const char *argv[]) {
       struct zsv_opts opts;
       enum zsv_status stat = zsv_args_to_opts(argc, argv, &argc, argv, &opts, opts_used);
       if(stat == zsv_status_ok)
-        return builtin->cmd(argc - 1, argc > 1 ? &argv[1] : NULL, &opts, opts_used);
+        return builtin->cmd(argc - 1, argc > 1 ? &argv[1] : NULL, &opts, NULL, opts_used);
       return stat;
     }
   }

+ 86 - 26
csv.mod/zsv/app/compare.c

@@ -138,7 +138,8 @@ static void zsv_compare_json_row_end(struct zsv_compare_data *data) {
 static void zsv_compare_output_tuple(struct zsv_compare_data *data,
                                      struct zsv_compare_input *key_input,
                                      const unsigned char *colname,
-                                     struct zsv_cell *values // in original input order
+                                     struct zsv_cell *values, // in original input order
+                                     char is_key
                                      ) {
   // print ID | Column | Value 1 | ... | Value N
   if(data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON)
@@ -159,7 +160,7 @@ static void zsv_compare_output_tuple(struct zsv_compare_data *data,
 
   for(unsigned i = 0; i < data->input_count; i++) {
     struct zsv_compare_input *input = &data->inputs[i];
-    if(input->done || !input->row_loaded) { // no data for this input
+    if((input->done || !input->row_loaded) && !is_key) { // no data for this input
       zsv_compare_output_str(data, NULL, ZSV_WRITER_SAME_ROW, 0);
     } else {
       struct zsv_cell *value = &values[i];
@@ -182,6 +183,34 @@ static void zsv_compare_output_tuple(struct zsv_compare_data *data,
     zsv_compare_json_row_end(data);
 }
 
+static const unsigned char *zsv_compare_combined_key_names(struct zsv_compare_data *data) {
+  if(!data->combined_key_names) {
+    size_t len = 2;
+
+    for(unsigned key_ix = 0; key_ix < data->key_count; key_ix++) {
+      struct zsv_compare_key *key = &data->keys[key_ix];
+      if(key && key->name)
+        len += strlen(key->name) + 1;
+    }
+    if((data->combined_key_names = calloc(1, len))) {
+      unsigned char *start = NULL;
+      for(unsigned key_ix = 0; key_ix < data->key_count; key_ix++) {
+        struct zsv_compare_key *key = &data->keys[key_ix];
+        if(key && key->name) {
+          if(start) {
+            *start = (unsigned char)'|';
+            start++;
+          } else
+            start = data->combined_key_names;
+          strcpy((char *)start, key->name);
+          start += strlen((char *)start);
+        }
+      }
+    }
+  }
+  return data->combined_key_names;
+}
+
 static void zsv_compare_print_row(struct zsv_compare_data *data,
                                   const unsigned last_ix   // last input ix in inputs_to_sort
                                   ) {
@@ -198,19 +227,25 @@ static void zsv_compare_print_row(struct zsv_compare_data *data,
 
 #define ZSV_COMPARE_MISSING "Missing"
 
-  if(last_ix + 1 < data->input_count) {
+//  if(last_ix + 1 < data->input_count) {
     // if we don't have data from every input, then output "Missing" for missing inputs
-    for(unsigned i = last_ix + 1; i < data->input_count; i++) {
+    char got_missing = 0;
+    for(unsigned i = 0; i < data->input_count; i++) {
       struct zsv_compare_input *input = data->inputs_to_sort[i];
-      unsigned input_ix = input->index;
-      values[input_ix].str = (unsigned char *)ZSV_COMPARE_MISSING;
-      values[input_ix].len = strlen(ZSV_COMPARE_MISSING);
+      if(i > last_ix) {
+        got_missing = 1;
+        unsigned input_ix = input->index;
+        values[input_ix].str = (unsigned char *)ZSV_COMPARE_MISSING;
+        values[input_ix].len = strlen(ZSV_COMPARE_MISSING);
+      }
     }
-    zsv_compare_output_tuple(data, key_input, (unsigned char *)"<key>", values);
-
-    // reset values
-    memset(values, 0, data->input_count * sizeof(*values));
-  }
+    if(got_missing) {
+      const unsigned char *key_names = data->print_key_col_names ? zsv_compare_combined_key_names(data) : (const unsigned char *)"<key>";
+      zsv_compare_output_tuple(data, key_input, key_names, values, 1);
+      // reset values
+      memset(values, 0, data->input_count * sizeof(*values));
+    }
+//  }
 
   // for each output column
   zsv_compare_unique_colname *output_col = data->output_colnames_first;
@@ -237,13 +272,13 @@ static void zsv_compare_print_row(struct zsv_compare_data *data,
         if(!output_col)
           output_col = input->output_colnames[input_col_ix];
         values[input_ix] = data->get_cell(input, input_col_ix);
-        if(i > 0 && !different && data->cmp(data->cmp_ctx, values[first_input_ix], values[input_ix]))
+        if(i > 0 && !different && data->cmp(data->cmp_ctx, values[first_input_ix], values[input_ix], data, input_col_ix))
           different = 1;
       }
     }
 
     if(different)
-      zsv_compare_output_tuple(data, key_input, output_col->name, values);
+      zsv_compare_output_tuple(data, key_input, output_col->name, values, 0);
   }
   free(values);
 }
@@ -283,7 +318,8 @@ static enum zsv_compare_status zsv_compare_set_inputs(struct zsv_compare_data *d
   return zsv_compare_status_ok;
 }
 
-static int zsv_compare_cell(void *ctx, struct zsv_cell c1, struct zsv_cell c2);
+static int zsv_compare_cell(void *ctx, struct zsv_cell c1, struct zsv_cell c2,
+                            void *data, unsigned col_ix);
 
 static void zsv_compare_output_begin(struct zsv_compare_data *data) {
   if(data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON) {
@@ -334,8 +370,7 @@ static void zsv_compare_output_begin(struct zsv_compare_data *data) {
 
     // write additional column names
     for(struct zsv_compare_added_column *ac = data->added_columns; ac; ac = ac->next)
-      zsv_compare_header_str(data, ac->colname->name, // ac->colname->name_len,
-                             ZSV_WRITER_SAME_ROW, 1);
+      zsv_compare_header_str(data, ac->colname->name, ZSV_WRITER_SAME_ROW, 1);
 
     if(data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON && !data->writer.object)
       jsonwriter_end_array(data->writer.handle.jsw);
@@ -370,6 +405,7 @@ static enum zsv_compare_status
 input_init_unsorted(struct zsv_compare_data *data,
                     struct zsv_compare_input *input,
                     struct zsv_opts *opts,
+                    struct zsv_prop_handler *custom_prop_handler,
                     const char *opts_used) {
   (void)(opts_used);
   if(!(input->stream = fopen(input->path, "rb"))) {
@@ -378,7 +414,7 @@ input_init_unsorted(struct zsv_compare_data *data,
   }
   struct zsv_opts these_opts = *opts;
   these_opts.stream = input->stream;
-  enum zsv_status stat = zsv_new_with_properties(&these_opts, input->path, NULL, &input->parser);
+  enum zsv_status stat = zsv_new_with_properties(&these_opts, custom_prop_handler, input->path, NULL, &input->parser);
   if(stat != zsv_status_ok)
     return zsv_compare_status_error;
 
@@ -390,7 +426,11 @@ input_init_unsorted(struct zsv_compare_data *data,
 
 zsv_compare_handle zsv_compare_new() {
   zsv_compare_handle z = calloc(1, sizeof(*z));
-  zsv_compare_set_comparison(z, zsv_compare_cell, z);
+#if defined(ZSV_COMPARE_CMP_FUNC) && defined(ZSV_COMPARE_CMP_CTX)
+  zsv_compare_set_comparison(z, ZSV_COMPARE_CMP_FUNC, ZSV_COMPARE_CMP_CTX);
+#else
+  zsv_compare_set_comparison(z, zsv_compare_cell, NULL);
+#endif
   z->output_colnames_next = &z->output_colnames;
 
   z->next_row = zsv_compare_next_unsorted_row;
@@ -431,6 +471,7 @@ static void zsv_compare_data_free(struct zsv_compare_data *data) {
   for(unsigned i = 0; i < data->input_count; i++)
     zsv_compare_input_free(&data->inputs[i]);
   free(data->inputs);
+  free(data->combined_key_names);
   free(data->inputs_to_sort);
   for(unsigned i = 0; i < data->writer.properties.used; i++)
     free(data->writer.properties.names[i]);
@@ -466,8 +507,11 @@ void zsv_compare_set_comparison(struct zsv_compare_data *data,
   data->cmp_ctx = cmp_ctx;
 }
 
-static int zsv_compare_cell(void *ctx, struct zsv_cell c1, struct zsv_cell c2) {
+static int zsv_compare_cell(void *ctx, struct zsv_cell c1, struct zsv_cell c2,
+                            void *data, unsigned col_ix) {
   (void)(ctx);
+  (void)(data);
+  (void)(col_ix);
   return zsv_strincmp(c1.str, c1.len,
                       c2.str, c2.len);
 }
@@ -478,7 +522,11 @@ static enum zsv_compare_status zsv_compare_advance(struct zsv_compare_data *data
   for(unsigned i = 0; i < data->input_count; i++) {
     struct zsv_compare_input *input = &data->inputs[i];
     if(input->done) continue;
-    if(input->row_loaded) continue;
+
+    if(input->row_loaded) {
+      got = 1;
+      continue;
+    }
     if(data->next_row(input) != zsv_status_row)
       input->done = 1;
     else {
@@ -563,6 +611,8 @@ static int compare_usage() {
     "  --json           : output as JSON",
     "  --json-compact   : output as compact JSON",
     "  --json-object    : output as an array of objects",
+    "  --print-key-colname : when outputting key column diffs,",
+    "                        print column name instead of <key>",
     "",
     "NOTES",
     "",
@@ -591,9 +641,8 @@ static int compare_usage() {
   return 0;
 }
 
-// TO DO: consolidate common code w sql.c-- move to utils/db.c?
-int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts,
-                               const char *opts_used) {
+// TO DO: consolidate w sql.c, move common code to utils/db.c
+int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
   /**
    * See sql.c re passing options to sqlite3 when sorting is used
    */
@@ -656,11 +705,14 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
     } else if(!strcmp(arg, "--json-compact")) {
       data->writer.type = ZSV_COMPARE_OUTPUT_TYPE_JSON;
       data->writer.compact = 1;
+    } else if(!strcmp(arg, "--print-key-colname")) {
+      data->print_key_col_names = 1;
     } else
       input_filenames[input_count++] = arg;
   }
 
   struct zsv_opts original_default_opts;
+  struct zsv_prop_handler original_default_custom_prop_handler;
   if(data->sort) {
     if(!data->key_count) {
       fprintf(stderr, "Error: --sort requires one or more keys\n");
@@ -669,6 +721,11 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
       original_default_opts = zsv_get_default_opts();
       zsv_set_default_opts(*opts);
 
+      if(custom_prop_handler) {
+        original_default_custom_prop_handler = zsv_get_default_custom_prop_handler();
+        zsv_set_default_custom_prop_handler(*custom_prop_handler);
+      }
+
       if(data->status == zsv_compare_status_ok)
         data->status = zsv_compare_init_sorted(data);
     }
@@ -684,7 +741,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
       for(unsigned ix = 0; data->status == zsv_compare_status_ok && ix < input_count; ix++) {
         struct zsv_compare_input *input = &data->inputs[ix];
         input->path = input_filenames[ix];
-        data->status = data->input_init(data, input, opts, opts_used);
+        data->status = data->input_init(data, input, opts, custom_prop_handler, opts_used);
       }
     }
 
@@ -840,8 +897,11 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
 
   err = data->status == zsv_compare_status_ok ? 0 : 1;
 
-  if(data->sort)
+  if(data->sort) {
     zsv_set_default_opts(original_default_opts); // restore default options
+    if(custom_prop_handler)
+      zsv_set_default_custom_prop_handler(original_default_custom_prop_handler);
+  }
 
   zsv_compare_delete(data);
   return err;

+ 3 - 1
csv.mod/zsv/app/compare.h

@@ -12,7 +12,9 @@ enum zsv_compare_status {
 
 typedef struct zsv_compare_data *zsv_compare_handle;
 
-typedef int (*zsv_compare_cell_func)(void *ctx, struct zsv_cell, struct zsv_cell);
+typedef int (*zsv_compare_cell_func)(void *ctx, struct zsv_cell, struct zsv_cell,
+                                     void *struct_zsv_compare_data,
+                                     unsigned input_col_ix);
 
 zsv_compare_handle zsv_compare_new();
 // enum zsv_compare_status zsv_compare_set_inputs(zsv_compare_handle, unsigned input_count, unsigned key_count);

+ 7 - 11
csv.mod/zsv/app/compare_internal.h

@@ -43,7 +43,7 @@ struct zsv_compare_input {
 
   unsigned col_count;
   unsigned *out2in; // out2in[output column ix] = input column ix + 1 (zero for no match)
-  zsv_compare_unique_colname **output_colnames; // colname_ptrs;
+  zsv_compare_unique_colname **output_colnames;
 
   unsigned key_count;
   struct zsv_compare_input_key *keys;
@@ -51,8 +51,9 @@ struct zsv_compare_input {
   sqlite3_stmt *sort_stmt;
 
   unsigned char row_loaded:1;
+  unsigned char missing:1;
   unsigned char done:1;
-  unsigned char _:6;
+  unsigned char _:5;
 };
 
 struct zsv_compare_key {
@@ -69,13 +70,6 @@ struct zsv_compare_added_column {
   unsigned col_ix; // index of column in input from which to extract this value
 };
 
-/*
-struct zsv_compare_sort {
-  struct zsv_compare_sort *next;
-  const char *by;
-};
-*/
-
 struct zsv_compare_data {
   enum zsv_compare_status status;
   unsigned input_count; // number of allocated compare_input structs
@@ -84,6 +78,7 @@ struct zsv_compare_data {
 
   unsigned key_count;
   struct zsv_compare_key *keys;
+  unsigned char *combined_key_names;
 
   size_t row_count; // only matters if no ID columns are specified
 
@@ -106,9 +101,9 @@ struct zsv_compare_data {
   enum zsv_compare_status (*input_init)(struct zsv_compare_data *data,
                                         struct zsv_compare_input *input,
                                         struct zsv_opts *opts,
+                                        struct zsv_prop_handler *custom_prop_handler,
                                         const char *opts_used);
 
-//  struct zsv_compare_sort *sort;
   sqlite3 *sort_db; // used when --sort option was specified
 
   struct {
@@ -132,7 +127,8 @@ struct zsv_compare_data {
 
   unsigned char sort:1;
   unsigned char sort_in_memory:1;
-  unsigned char _:6;
+  unsigned char print_key_col_names:1;
+  unsigned char _:5;
 };
 
 #endif

+ 5 - 29
csv.mod/zsv/app/compare_sort.c

@@ -26,31 +26,6 @@ static int zsv_compare_sort_prep_table(struct zsv_compare_data *data,
   return rc;
 }
 
-/*
-static
-struct zsv_compare_sort **zsv_compare_sort_add(struct zsv_compare_sort **next,
-                                               const char *by,
-                                               enum zsv_compare_status *stat) {
-  struct zsv_compare_sort *e = calloc(1, sizeof(*e));
-  if(!e)
-    *stat = zsv_compare_status_memory;
-  else {
-    e->by = by;
-    *next = e;
-    next = &e->next;
-  }
-  return next;
-}
-
-static void zsv_compare_sort_delete(struct zsv_compare_sort *sort) {
-  for(struct zsv_compare_sort *next; sort; sort = next) {
-    next = sort->next;
-    free(sort);
-  }
-}
-
-*/
-
 static int zsv_compare_sort_stmt_prep(sqlite3 *db, sqlite3_stmt **stmtp,
                                       // struct zsv_compare_sort *sort,
                                       struct zsv_compare_key *keys,
@@ -62,7 +37,6 @@ static int zsv_compare_sort_stmt_prep(sqlite3 *db, sqlite3_stmt **stmtp,
   }
 
   sqlite3_str_appendf(select_clause, "select * from data%i order by ", ix);
-//  for(struct zsv_compare_sort *tmp = sort; tmp; tmp = tmp->next)
   for(struct zsv_compare_key *key = keys; key; key = key->next)
     sqlite3_str_appendf(select_clause, "%s\"%w\"", key == keys ? "" : ", ", key->name);
 
@@ -76,10 +50,12 @@ static int zsv_compare_sort_stmt_prep(sqlite3 *db, sqlite3_stmt **stmtp,
 static enum zsv_compare_status
 input_init_sorted(struct zsv_compare_data *data,
                   struct zsv_compare_input *input,
-                  struct zsv_opts *opts,
+                  struct zsv_opts *_opts,
+                  struct zsv_prop_handler *_prop_handler,
                   const char *opts_used
                   ) {
-  (void)(opts);
+  (void)(_opts);
+  (void)(_prop_handler);
   char *err_msg = NULL;
   int rc = zsv_compare_sort_prep_table(data, input->path, opts_used, 0, &err_msg, input->index);
   if(err_msg) {
@@ -88,7 +64,7 @@ input_init_sorted(struct zsv_compare_data *data,
   }
   if(rc == SQLITE_OK)
     rc = zsv_compare_sort_stmt_prep(data->sort_db, &input->sort_stmt,
-                                    data->keys, // data->sort,
+                                    data->keys,
                                     input->index);
   return rc == SQLITE_OK ? zsv_compare_status_ok : zsv_compare_status_error;
 }

+ 2 - 2
csv.mod/zsv/app/count-pull.c

@@ -23,7 +23,7 @@ static int count_usage() {
   return 0;
 }
 
-int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used) {
+int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
   const char *input_path = NULL;
   int err = 0;
   for(int i = 1; !err && i < argc; i++) {
@@ -61,7 +61,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
   if(!err) {
     zsv_parser parser;
 //    if(zsv_pull_new_with_properties(opts, input_path, opts_used, &parser) != zsv_status_ok) {
-    if(zsv_new_with_properties(opts, input_path, opts_used, &parser) != zsv_status_ok) {
+    if(zsv_new_with_properties(opts, custom_prop_handler, input_path, opts_used, &parser) != zsv_status_ok) {
       fprintf(stderr, "Unable to initialize parser\n");
       err = 1;
     } else {

+ 2 - 2
csv.mod/zsv/app/count.c

@@ -32,7 +32,7 @@ static int count_usage() {
   return 0;
 }
 
-int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used) {
+int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
   struct data data = { 0 };
   const char *input_path = NULL;
   int err = 0;
@@ -71,7 +71,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
   if(!err) {
     opts->row_handler = row;
     opts->ctx = &data;
-    if(zsv_new_with_properties(opts, input_path, opts_used, &data.parser) != zsv_status_ok) {
+    if(zsv_new_with_properties(opts, custom_prop_handler, input_path, opts_used, &data.parser) != zsv_status_ok) {
       fprintf(stderr, "Unable to initialize parser\n");
       err = 1;
     } else {

+ 4 - 3
csv.mod/zsv/app/desc.c

@@ -493,6 +493,7 @@ static void zsv_desc_cleanup(struct zsv_desc_data *data) {
 #define ZSV_DESC_TMPFN_TEMPLATE "zsv_desc_XXXXXXXXXXXX"
 
 static void zsv_desc_execute(struct zsv_desc_data *data,
+                             struct zsv_prop_handler *custom_prop_handler,
                              const char *input_path,
                              const char *opts_used) {
   data->opts->cell_handler = zsv_desc_cell;
@@ -501,7 +502,7 @@ static void zsv_desc_execute(struct zsv_desc_data *data,
 
   if(!data->max_enum)
     data->max_enum = ZSV_DESC_MAX_ENUM_DEFAULT;
-  if(zsv_new_with_properties(data->opts, input_path, opts_used, &data->parser)
+  if(zsv_new_with_properties(data->opts, custom_prop_handler, input_path, opts_used, &data->parser)
      == zsv_status_ok) {
     FILE *input_temp_file = NULL;
     enum zsv_status status;
@@ -517,7 +518,7 @@ static void zsv_desc_execute(struct zsv_desc_data *data,
   }
 }
 
-int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used) {
+int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
   if(argc < 1)
     zsv_desc_usage();
   else if(argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")))
@@ -593,7 +594,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
       return 1;
     }
 
-    zsv_desc_execute(&data, input_path, opts_used);
+    zsv_desc_execute(&data, custom_prop_handler, input_path, opts_used);
     zsv_desc_finalize(&data);
     zsv_desc_print(&data);
     zsv_desc_cleanup(&data);

+ 29 - 5
csv.mod/zsv/app/echo.c

@@ -114,6 +114,8 @@ const char *zsv_echo_usage_msg[] = {
   "  --overwrite <source>: overwrite cells using given source. Source may be:",
   "                        - sqlite3://<filename>[?sql=<query>]",
   "                          ex: sqlite3://overwrites.db?sql=select row, column, value from overwrites order by row, column",
+  "                        - /path/to/file.csv",
+  "                          path to CSV file with columns row,col,val (in that order) and rows pre-sorted by row and column",
   NULL
 };
 
@@ -134,8 +136,9 @@ static void zsv_echo_cleanup(struct zsv_echo_data *data) {
     sqlite3_close(data->o.sqlite3.db);
 }
 
-static int zsv_echo_parse_overwrite_source(struct zsv_echo_data *data, const char *source, size_t len) {
 #define zsv_echo_sqlite3_prefix "sqlite3://"
+
+static int zsv_echo_parse_overwrite_source(struct zsv_echo_data *data, const char *source, size_t len) {
   size_t pfx_len;
   if(len > (pfx_len = strlen(zsv_echo_sqlite3_prefix)) && !memcmp(source, zsv_echo_sqlite3_prefix, pfx_len)) {
     data->o.sqlite3.filename = zsv_memdup(source + pfx_len, len - pfx_len);
@@ -149,7 +152,9 @@ static int zsv_echo_parse_overwrite_source(struct zsv_echo_data *data, const cha
         data->o.sqlite3.sql = sql + strlen(zsv_echo_sql_prefix);
     }
     // open the sql connection
-    if(!(data->o.sqlite3.filename && *data->o.sqlite3.filename)) {
+    if(!(data->o.sqlite3.filename && *data->o.sqlite3.filename
+         && data->o.sqlite3.sql && *data->o.sqlite3.sql)) {
+      free(data->o.sqlite3.filename);
       fprintf(stderr, "Invalid query string");
       return 1;
     }
@@ -182,7 +187,7 @@ static int zsv_echo_parse_overwrite_source(struct zsv_echo_data *data, const cha
   return 1;
 }
 
-int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used) {
+int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
   if(argc < 1 || (argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")))) {
     zsv_echo_usage();
     return 0;
@@ -194,6 +199,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
 
   int err = 0;
 
+  const char *overwrites_csv = NULL;
+
   data.overwrite.eof = 1;
   for(int arg_i = 1; !err && arg_i < argc; arg_i++) {
     const char *arg = argv[arg_i];
@@ -205,7 +212,12 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
         err = 1;
       } else {
         const char *src = argv[++arg_i];
-        err = zsv_echo_parse_overwrite_source(&data, src, strlen(src));
+        if(strlen(src) > strlen(zsv_echo_sqlite3_prefix) &&
+           !memcmp(zsv_echo_sqlite3_prefix, src, strlen(zsv_echo_sqlite3_prefix)))
+          err = zsv_echo_parse_overwrite_source(&data, src, strlen(src));
+        else {
+          overwrites_csv = src;
+        }
       }
     } else if(!data.in) {
 #ifndef NO_STDIN
@@ -243,7 +255,19 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
   opts->stream = data.in;
   opts->ctx = &data;
   data.csv_writer = zsv_writer_new(&writer_opts);
-  if(zsv_new_with_properties(opts, data.input_path, opts_used, &data.parser) != zsv_status_ok
+
+  if(overwrites_csv) {
+    if(!(opts->overwrite.ctx = fopen(overwrites_csv, "rb"))) {
+      fprintf(stderr, "Unable to open for write: %s\n", overwrites_csv);
+      zsv_echo_cleanup(&data);
+      return 1;
+    } else {
+      opts->overwrite.type = zsv_overwrite_type_csv;
+      opts->overwrite.close_ctx = (int (*)(void *))fclose;
+    }
+  }
+
+  if(zsv_new_with_properties(opts, custom_prop_handler, data.input_path, opts_used, &data.parser) != zsv_status_ok
      || !data.csv_writer) {
     zsv_echo_cleanup(&data);
     return 1;

+ 2 - 2
csv.mod/zsv/app/flatten.c

@@ -631,7 +631,7 @@ static void flatten_cleanup(struct flatten_data *data) {
   zsv_writer_delete(data->csv_writer);
 }
 
-int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used) {
+int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
   if(argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) {
     flatten_usage();
     return 0;
@@ -746,7 +746,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
       opts->ctx = &data;
 
       zsv_parser handle;
-      if(zsv_new_with_properties(opts, input_path, opts_used, &handle) != zsv_status_ok)
+      if(zsv_new_with_properties(opts, custom_prop_handler, input_path, opts_used, &handle) != zsv_status_ok)
         err = data.cancelled = zsv_printerr(1, "Unable to create csv parser");
       else {
         zsv_set_scan_filter(handle, zsv_filter_write, tmp_f);

+ 172 - 0
csv.mod/zsv/app/paste.c

@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2021 Liquidaty and the zsv/lib contributors
+ * All rights reserved
+ *
+ * This file is part of zsv/lib, distributed under the license defined at
+ * https://opensource.org/licenses/MIT
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <zsv/utils/writer.h>
+
+#define ZSV_COMMAND paste
+#include "zsv_command.h"
+
+static int zsv_paste_usage() {
+  static const char *usage =
+    "Usage: paste <filename> [<filename> ...]\n"
+    "\n"
+    "Options:\n"
+    " -h, --help            : show usage\n";
+  printf("%s\n", usage);
+  return 1;
+}
+
+struct zsv_paste_input_file {
+  struct zsv_paste_input_file *next;
+  const char *fname;
+  struct zsv_opts opts;
+  FILE *f;
+  unsigned col_count;
+  zsv_parser parser;
+  enum zsv_status zsv_status; // parser status
+};
+
+enum zsv_paste_status {
+  zsv_paste_status_ok = 0,
+  zsv_paste_status_file,
+  zsv_paste_status_memory,
+  zsv_paste_status_error
+};
+
+// zsv_paste_load_row: return number of inputs that a row was retrieved from
+static int zsv_paste_load_row(struct zsv_paste_input_file *inputs) {
+  int have_row = 0;
+  for(struct zsv_paste_input_file *pf = inputs; pf; pf = pf->next) {
+    if(pf->zsv_status == zsv_status_row) {
+      pf->zsv_status = zsv_next_row(pf->parser);
+      if(pf->zsv_status == zsv_status_row)
+        have_row++;
+    }
+  }
+  return have_row;
+}
+
+static void zsv_paste_print_row(zsv_csv_writer w, struct zsv_paste_input_file *inputs) {
+  char first = 1;
+  for(struct zsv_paste_input_file *pf = inputs; pf; pf = pf->next) {
+    unsigned int j = pf->zsv_status == zsv_status_row ? zsv_cell_count(pf->parser) : 0;
+    unsigned int k = pf->col_count;
+
+    for(unsigned int i = 0; i < j && i < k; i++) {
+      struct zsv_cell cell = zsv_get_cell(pf->parser, i);
+      zsv_writer_cell(w, first, cell.str, cell.len, cell.quoted);
+      first = 0;
+    }
+    for(unsigned int i = j; i < k; i++) {
+      zsv_writer_cell(w, first, (const unsigned char *)"", 0, 0);
+      first = 0;
+    }
+  }
+}
+
+static void zsv_paste_delete_input(struct zsv_paste_input_file *pf) {
+  if(pf->parser)
+    zsv_delete(pf->parser);
+  if(pf->opts.stream)
+    fclose(pf->opts.stream);
+  free(pf);
+}
+
+// zsv_paste_add_input(): return error
+static enum zsv_paste_status zsv_paste_add_input(
+                                                 const char *fname,
+                                                 struct zsv_paste_input_file **next,
+                                                 struct zsv_paste_input_file ***next_next,
+                                                 struct zsv_opts *opts,
+                                                 struct zsv_prop_handler *custom_prop_handler,
+                                                 const char *opts_used
+                                                 ) {
+  FILE *f = fopen(fname, "rb");
+  if(!f) {
+    perror(fname);
+    return zsv_paste_status_file;
+  }
+  struct zsv_paste_input_file *pf = calloc(1, sizeof(*pf));
+  if(!pf) {
+    fclose(f);
+    return zsv_paste_status_memory;
+  }
+
+  pf->opts = *opts;
+  pf->opts.stream = f;
+  pf->fname = fname;
+  *next = pf;
+  *next_next = &pf->next;
+
+  if(zsv_new_with_properties(&pf->opts, custom_prop_handler, fname, opts_used, &pf->parser) != zsv_status_ok) {
+    fprintf(stderr, "Unable to initialize parser for %s\n", fname);
+    return zsv_paste_status_error;
+  } else {
+    if((pf->zsv_status = zsv_next_row(pf->parser)) == zsv_status_row)
+      pf->col_count = zsv_cell_count(pf->parser);
+    else
+      fprintf(stderr, "Warning: no data read from %s\n", fname);
+  }
+  return zsv_paste_status_ok;
+}
+
+int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
+  struct zsv_paste_input_file *inputs = NULL;
+  struct zsv_paste_input_file **next_input = &inputs;
+  enum zsv_paste_status status = zsv_paste_status_ok;
+
+  struct zsv_csv_writer_options writer_opts = zsv_writer_get_default_opts();
+  zsv_csv_writer writer = zsv_writer_new(&writer_opts);
+  if(!writer) {
+    status = zsv_printerr(zsv_paste_status_error, "Unable to create csv writer");
+    goto zsv_paste_done;
+  }
+
+  for(int i = 1; status == zsv_paste_status_ok && i < argc; i++) {
+    const char *arg = argv[i];
+    if(!strcmp(arg, "-h") || !strcmp(arg, "--help")) {
+      zsv_paste_usage();
+      goto zsv_paste_done;
+    }
+
+    if(0) { // !strcmp(arg, "-x") || !strcmp(arg, "--my-arg")) { ...
+    } else {
+      status = zsv_paste_add_input(arg, next_input, &next_input, opts, custom_prop_handler, opts_used);
+    }
+  }
+
+  if(status == zsv_paste_status_ok) {
+    if(!inputs) {
+      fprintf(stderr, "Please specify at least one input file\n");
+      status = zsv_paste_status_error;
+      goto zsv_paste_done;
+    }
+
+    // print headers
+    zsv_paste_print_row(writer, inputs);
+
+    // print one row at a time
+    while(zsv_paste_load_row(inputs))
+      zsv_paste_print_row(writer, inputs);
+  }
+
+ zsv_paste_done:
+  for(struct zsv_paste_input_file *next, *pf = inputs; pf; pf = next) {
+    next = pf->next;
+    zsv_paste_delete_input(pf);
+  }
+
+  if(opts->stream && opts->stream != stdin)
+    fclose(opts->stream);
+
+  zsv_writer_delete(writer);
+  return status;
+}

+ 4 - 3
csv.mod/zsv/app/pretty.c

@@ -553,6 +553,7 @@ static void zsv_pretty_destroy(struct zsv_pretty_data *data) {
 
 static struct zsv_pretty_data *zsv_pretty_init(struct zsv_pretty_opts *opts,
                                                struct zsv_opts *parser_opts,
+                                               struct zsv_prop_handler *custom_prop_handler,
                                                const char *input_path,
                                                const char *opts_used) {
   struct zsv_pretty_data *data = calloc(1, sizeof(*data));
@@ -571,7 +572,7 @@ static struct zsv_pretty_data *zsv_pretty_init(struct zsv_pretty_opts *opts,
 
   parser_opts->row_handler = zsv_pretty_row;
   parser_opts->ctx = data;
-  zsv_new_with_properties(parser_opts, input_path, opts_used, &data->parser);
+  zsv_new_with_properties(parser_opts, custom_prop_handler, input_path, opts_used, &data->parser);
 
   data->write = (size_t (*)(const void *, size_t, size_t, void *))fwrite;
   data->write_arg = opts->out ? opts->out : stdout;
@@ -596,7 +597,7 @@ static struct zsv_pretty_data *zsv_pretty_init(struct zsv_pretty_opts *opts,
   return data;
 }
 
-int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *parser_opts, const char *opts_used) {
+int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *parser_opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
   if(argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) {
     zsv_pretty_usage();
     return 0;
@@ -682,7 +683,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *pa
     rc = zsv_printerr(1, "Min column width cannot exceed max column width or max line width");
 
   parser_opts->stream = in;
-  struct zsv_pretty_data *h = zsv_pretty_init(&opts, parser_opts, input_path, opts_used);
+  struct zsv_pretty_data *h = zsv_pretty_init(&opts, parser_opts, custom_prop_handler, input_path, opts_used);
   if(!h)
     rc = 1;
   else {

+ 3 - 4
csv.mod/zsv/app/prop.c

@@ -455,9 +455,10 @@ static int merge_and_save_properties(const unsigned char *filepath,
   if(!props_fn)
     err = 1;
   else {
-    struct zsv_file_properties fp = { 0 };
     struct zsv_opts zsv_opts = { 0 };
-    err = zsv_cache_load_props((const char *)filepath, &zsv_opts, &fp, NULL);
+    struct zsv_prop_handler custom_prop_handler = { 0 };
+    struct zsv_file_properties fp = zsv_cache_load_props((const char *)filepath, &zsv_opts, &custom_prop_handler, NULL);
+    err = fp.stat;
     if(!err) {
       if(save && !overwrite) {
         if((fp.header_span_specified && d)
@@ -863,8 +864,6 @@ static int zsv_prop_execute_import(const char *dest, const char *src, unsigned c
   return err;
 }
 
-
-
 int ZSV_MAIN_NO_OPTIONS_FUNC(ZSV_COMMAND)(int m_argc, const char *m_argv[]) {
   int err = 0;
   char verbose = 0;

+ 103 - 0
csv.mod/zsv/app/schema/database-table.json

@@ -0,0 +1,103 @@
+{
+  "description": "Table that can be indexed and queried via SQL",
+  "type": "array",
+  "items": [
+    {
+      "description": "Table metadata",
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "description": {
+          "description": "description of this table",
+          "type": "string"
+        },
+        "indexes": {
+          "description": "Map of indexes on this table",
+          "type": "object",
+          "additionalProperties": {
+            "type": "object",
+            "additionalProperties": false,
+            "$comment": "to do: add auto-generated description to documentation",
+            "properties": {
+              "on": {
+                "type": "array",
+                "items": {
+                  "description": "expression to index on, usually a column name such as 'ID' or '[Row ID]'",
+                  "type": "string"
+                }
+              },
+              "unique": {
+                "type": "boolean"
+              }
+            }
+          }
+        },
+        "columns": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "additionalProperties": false,
+            "properties": {
+              "name": {
+                "description": "column name",
+                "type": "string"
+              },
+              "description": {
+                "description": "description of this column",
+                "type": "string"
+              },
+              "collate": {
+                "type": "string",
+                "enum": [
+                  "nocase",
+                  "rtrim",
+                  "binary"
+                ]
+              },
+              "datatype": {
+                "type": "string",
+                "enum": [
+                  "int",
+                  "real",
+                  "text"
+                ]
+              },
+              "normalize": {
+                "type": "boolean"
+              }
+            }
+          }
+        }
+      }
+    },
+    {
+      "description": "table data: array of rows",
+      "type": "array",
+      "items": {
+        "type": "array",
+        "items": {
+          "oneOf": [
+            {
+              "type": "null"
+            },
+            {
+              "type": "string"
+            },
+            {
+              "type": "number"
+            },
+            {
+              "type": "integer"
+            },
+            {
+              "type": "boolean"
+            },
+            {
+              "type": "null"
+            }
+          ]
+        }
+      }
+    }
+  ]
+}

+ 45 - 14
csv.mod/zsv/app/select-pull.c

@@ -74,6 +74,8 @@ struct zsv_select_data {
   unsigned int header_name_count;
   unsigned char **header_names;
 
+  const char *prepend_header; // --prepend-header
+
   char header_finished;
 
   char embedded_lineend;
@@ -111,7 +113,9 @@ struct zsv_select_data {
   unsigned char any_clean:1;
 #define ZSV_SELECT_DISTINCT_MERGE 2
   unsigned char distinct:2; // 1 = ignore subsequent cols, ZSV_SELECT_DISTINCT_MERGE = merge subsequent cols (first non-null value)
-  unsigned char _:5;
+
+  unsigned char no_header:1;
+  unsigned char _:4;
 };
 
 enum zsv_select_column_index_selection_type {
@@ -138,10 +142,30 @@ static inline unsigned char *zsv_select_get_header_name(struct zsv_select_data *
 static inline char zsv_select_excluded_current_header_name(struct zsv_select_data *data, unsigned in_ix) {
   if(data->exclusion_count) {
     unsigned char *header_name = zsv_select_get_header_name(data, in_ix);
-    if(header_name) {
-      for(unsigned int i = 0; i < data->exclusion_count; i++)
-        if(!zsv_stricmp(header_name, data->exclusions[i]))
-          return 1;
+    if(data->use_header_indexes) {
+      for(unsigned int ix = 0; ix < data->exclusion_count; ix++) {
+        unsigned i, j;
+        switch(zsv_select_column_index_selection(data->exclusions[ix], &i, &j)) {
+        case zsv_select_column_index_selection_type_none:
+          // not expected!
+          break;
+        case zsv_select_column_index_selection_type_single:
+          if(in_ix + 1 == i) return 1;
+          break;
+        case zsv_select_column_index_selection_type_range:
+          if(i <= in_ix + 1 && in_ix + 1 <= j) return 1;
+          break;
+        case zsv_select_column_index_selection_type_lower_bounded:
+          if(i <= in_ix + 1) return 1;
+          break;
+        }
+      }
+    } else {
+      if(header_name) {
+        for(unsigned int i = 0; i < data->exclusion_count; i++)
+          if(!zsv_stricmp(header_name, data->exclusions[i]))
+            return 1;
+      }
     }
   }
   return 0;
@@ -446,12 +470,16 @@ static void zsv_select_data_row(struct zsv_select_data *data, zsv_parser p) {
 }
 
 static void zsv_select_print_header_row(struct zsv_select_data *data) {
+  if(data->no_header)
+    return;
+  zsv_writer_cell_prepend(data->csv_writer, (const unsigned char *)data->prepend_header);
   if(data->prepend_line_number)
     zsv_writer_cell_s(data->csv_writer, 1, (const unsigned char *)"#", 0);
   for(unsigned int i = 0; i < data->output_cols_count; i++) {
     unsigned char *header_name = zsv_select_get_header_name(data, data->out2in[i].ix);
     zsv_writer_cell_s(data->csv_writer, i == 0 && !data->prepend_line_number, header_name, 1);
   }
+  zsv_writer_cell_prepend(data->csv_writer, NULL);
 }
 
 static void zsv_select_header_finish(struct zsv_select_data *data) {
@@ -511,10 +539,9 @@ const char *zsv_select_usage_msg[] = {
 #ifndef ZSV_CLI
   "  -v, --verbose: verbose output",
 #endif
-  "  -H, --head <n>: (head) only process the first n rows of data",
-  "                                selected from all rows in the input",
-  "  --header-row <header row>: insert the provided CSV as the first row",
-  "        e.g. --header-row 'colname1,colname2,\"my column 3\"'",
+  "  -H,--head <n>               : (head) only process the first n rows of data from all rows (including header) in the input",
+  "  --no-header                 : do not output a header row",
+  "  --prepend-header <value>    : prepend each column header with the given text value",
   "  -s, --search <value>: only output rows with at least one cell containing value",
   // to do: " -s, --search /<pattern>/modifiers: search on regex pattern; modifiers include 'g' (global) and 'i' (case-insensitive)",
   "  --sample-every <num of rows>: output a sample consisting of the first row, then every nth row",
@@ -574,7 +601,7 @@ static void zsv_select_cleanup(struct zsv_select_data *data) {
 /*  free(data->fixed.offsets); */
 }
 
-int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used) {
+int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
   if(argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) {
     zsv_select_usage();
     return zsv_status_ok;
@@ -671,6 +698,13 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
         stat = zsv_printerr(-1, "--sample-pct value should be a number between 0 and 100 (e.g. 1.5 for a sample of 1.5% of the data");
       else
         data.sample_pct = d;
+    } else if(!strcmp(argv[arg_i], "--prepend-header")) {
+      if(!(arg_i + 1 < argc))
+        stat = zsv_printerr(1, "%s option requires a value");
+      else
+        data.prepend_header = argv[++arg_i];
+    } else if(!strcmp(argv[arg_i], "--no-header")) {
+      data.no_header = 1;
     } else if(!strcmp(argv[arg_i], "-H") || !strcmp(argv[arg_i], "--head")) {
       if(!(arg_i + 1 < argc && atoi(argv[arg_i+1]) >= 0))
         stat = zsv_printerr(1, "%s option value invalid: should be positive integer; got %s", argv[arg_i], arg_i + 1 < argc ? argv[arg_i+1] : "");
@@ -696,9 +730,6 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
       arg_i++;
       if(!(arg_i < argc))
         stat = zsv_printerr(1, "%s option requires a value", argv[arg_i-1]);
-      else if(zsv_select_column_index_selection((const unsigned char *)argv[arg_i], NULL, NULL) ==
-              zsv_select_column_index_selection_type_none)
-        stat = zsv_printerr(1, "%s option: invalid value %s (expected number or number range e.g. 8 or 8-12)", argv[arg_i-1], argv[arg_i]);
       else
         zsv_select_add_exclusion(&data, argv[arg_i]);
     } else if(*argv[arg_i] == '-')
@@ -741,7 +772,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
       stat = zsv_status_memory;
     else {
       zsv_parser parser;
-      if(zsv_new_with_properties(data.opts, input_path, opts_used, &parser)
+      if(zsv_new_with_properties(data.opts, custom_prop_handler, input_path, opts_used, &parser)
          == zsv_status_ok) {
         // all done with
         data.any_clean =

+ 45 - 14
csv.mod/zsv/app/select.c

@@ -80,6 +80,8 @@ struct zsv_select_data {
   unsigned int header_name_count;
   unsigned char **header_names;
 
+  const char *prepend_header; // --prepend-header
+
   char header_finished;
 
   char embedded_lineend;
@@ -114,7 +116,8 @@ struct zsv_select_data {
 #define ZSV_SELECT_DISTINCT_MERGE 2
   unsigned char distinct:2; // 1 = ignore subsequent cols, ZSV_SELECT_DISTINCT_MERGE = merge subsequent cols (first non-null value)
   unsigned char unescape:1;
-  unsigned char _:4;
+  unsigned char no_header:1; // --no-header
+  unsigned char _:3;
 };
 
 enum zsv_select_column_index_selection_type {
@@ -140,11 +143,31 @@ static inline unsigned char *zsv_select_get_header_name(struct zsv_select_data *
 
 static inline char zsv_select_excluded_current_header_name(struct zsv_select_data *data, unsigned in_ix) {
   if(data->exclusion_count) {
-    unsigned char *header_name = zsv_select_get_header_name(data, in_ix);
-    if(header_name) {
-      for(unsigned int i = 0; i < data->exclusion_count; i++)
-        if(!zsv_stricmp(header_name, data->exclusions[i]))
-          return 1;
+    if(data->use_header_indexes) {
+      for(unsigned int ix = 0; ix < data->exclusion_count; ix++) {
+        unsigned i, j;
+        switch(zsv_select_column_index_selection(data->exclusions[ix], &i, &j)) {
+        case zsv_select_column_index_selection_type_none:
+          // not expected!
+          break;
+        case zsv_select_column_index_selection_type_single:
+          if(in_ix + 1 == i) return 1;
+          break;
+        case zsv_select_column_index_selection_type_range:
+          if(i <= in_ix + 1 && in_ix + 1 <= j) return 1;
+          break;
+        case zsv_select_column_index_selection_type_lower_bounded:
+          if(i <= in_ix + 1) return 1;
+          break;
+        }
+      }
+    } else {
+      unsigned char *header_name = zsv_select_get_header_name(data, in_ix);
+      if(header_name) {
+        for(unsigned int i = 0; i < data->exclusion_count; i++)
+          if(!zsv_stricmp(header_name, data->exclusions[i]))
+            return 1;
+      }
     }
   }
   return 0;
@@ -458,12 +481,16 @@ static void zsv_select_data_row(void *ctx) {
 }
 
 static void zsv_select_print_header_row(struct zsv_select_data *data) {
+  if(data->no_header)
+    return;
+  zsv_writer_cell_prepend(data->csv_writer, (const unsigned char *)data->prepend_header);
   if(data->prepend_line_number)
     zsv_writer_cell_s(data->csv_writer, 1, (const unsigned char *)"#", 0);
   for(unsigned int i = 0; i < data->output_cols_count; i++) {
     unsigned char *header_name = zsv_select_get_header_name(data, data->out2in[i].ix);
     zsv_writer_cell_s(data->csv_writer, i == 0 && !data->prepend_line_number, header_name, 1);
   }
+  zsv_writer_cell_prepend(data->csv_writer, NULL);
 }
 
 static void zsv_select_header_finish(struct zsv_select_data *data) {
@@ -527,8 +554,9 @@ const char *zsv_select_usage_msg[] = {
 #ifndef ZSV_CLI
   "  -v,--verbose                : verbose output",
 #endif
-  "  -H,--head <n>               : (head) only process the first n rows of data",
-  "                                selected from all rows in the input",
+  "  -H,--head <n>               : (head) only process the first n rows of data from all rows (including header) in the input",
+  "  --no-header                 : do not output a header row",
+  "  --prepend-header <value>    : prepend each column header with the given text value",
   "  -s,--search <value>         : only output rows with at least one cell containing"
   "                                value",
   // to do: " -s,--search /<pattern>/modifiers: search on regex pattern; modifiers include 'g' (global) and 'i' (case-insensitive)",
@@ -714,8 +742,7 @@ static enum zsv_status auto_detect_fixed_column_sizes(struct fixed *fixed, struc
   return stat;
 }
 
-
-int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used) {
+int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
   if(argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) {
     zsv_select_usage();
     return zsv_status_ok;
@@ -818,6 +845,13 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
         stat = zsv_printerr(-1, "--sample-pct value should be a number between 0 and 100 (e.g. 1.5 for a sample of 1.5% of the data");
       else
         data.sample_pct = d;
+    } else if(!strcmp(argv[arg_i], "--prepend-header")) {
+      if(!(arg_i + 1 < argc))
+        stat = zsv_printerr(1, "%s option requires a value");
+      else
+        data.prepend_header = argv[++arg_i];
+    } else if(!strcmp(argv[arg_i], "--no-header")) {
+        data.no_header = 1;
     } else if(!strcmp(argv[arg_i], "-H") || !strcmp(argv[arg_i], "--head")) {
       if(!(arg_i + 1 < argc && atoi(argv[arg_i+1]) >= 0))
         stat = zsv_printerr(1, "%s option value invalid: should be positive integer; got %s", argv[arg_i], arg_i + 1 < argc ? argv[arg_i+1] : "");
@@ -843,9 +877,6 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
       arg_i++;
       if(!(arg_i < argc))
         stat = zsv_printerr(1, "%s option requires a value", argv[arg_i-1]);
-      else if(zsv_select_column_index_selection((const unsigned char *)argv[arg_i], NULL, NULL) ==
-              zsv_select_column_index_selection_type_none)
-        stat = zsv_printerr(1, "%s option: invalid value %s (expected number or number range e.g. 8 or 8-12)", argv[arg_i-1], argv[arg_i]);
       else
         zsv_select_add_exclusion(&data, argv[arg_i]);
     } else if(*argv[arg_i] == '-')
@@ -904,7 +935,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
     else {
       data.opts->row_handler = zsv_select_header_row;
       data.opts->ctx = &data;
-      if(zsv_new_with_properties(data.opts, input_path, opts_used, &data.parser)
+      if(zsv_new_with_properties(data.opts, custom_prop_handler, input_path, opts_used, &data.parser)
          == zsv_status_ok) {
         // all done with
         data.any_clean =

+ 30 - 17
csv.mod/zsv/app/serialize.c

@@ -38,6 +38,7 @@ struct serialize_data {
 
   struct output_header_name *header_names;
   unsigned int col_count;
+  unsigned int id_column_position;
 
   char *err_msg;
 
@@ -124,10 +125,10 @@ static void serialize_header(void *hook) {
     asprintf(&data->err_msg, "No columns read in first row; aborting\n");
   else {
     // write header
-    struct zsv_cell firstCell = zsv_get_cell(data->parser, 0);
-    if(firstCell.len == 0) {
-      firstCell.str = (unsigned char *)"(Blank)";
-      firstCell.len = strlen((const char *)firstCell.str);
+    struct zsv_cell idCell = zsv_get_cell(data->parser, data->id_column_position);
+    if(idCell.len == 0) {
+      idCell.str = (unsigned char *)"(Blank)";
+      idCell.len = strlen((const char *)idCell.str);
     }
     // if we have additional columns, find them and output their header names
     if(data->additional_columns) {
@@ -173,24 +174,26 @@ static void serialize_header(void *hook) {
 
     if(!data->err_msg) {
       // print the output table header
-      serialize_write_tuple(data, firstCell.str, firstCell.len, firstCell.quoted,
+      serialize_write_tuple(data, idCell.str, idCell.len, idCell.quoted,
                             (const unsigned char *)"Column", strlen("Column"),
                             (const unsigned char *)"Value", strlen("Value"), 0);
 
       if(data->use_column_position) {
         // process the header row as if it was a data row
         // output ID cell
-        struct zsv_cell cell = zsv_get_cell(data->parser, 0);
+        struct zsv_cell cell = zsv_get_cell(data->parser, data->id_column_position);
         serialize_write_tuple(data, (const unsigned char *)"Header", strlen("Header"), 0,
                               (const unsigned char *)"0", 1,
                               cell.str, cell.len, cell.quoted);
 
         // output other cells
-        for(unsigned i = 1; i < data->col_count; i++) {
-          cell = zsv_get_cell(data->parser, i);
-          serialize_write_tuple(data, (const unsigned char *)"Header", strlen("Header"), 0,
-                                data->header_names[i].str, data->header_names[i].len,
-                                cell.str, cell.len, cell.quoted);
+        for(unsigned i = 0; i < data->col_count; i++) {
+          if(i != data->id_column_position) {
+            cell = zsv_get_cell(data->parser, i);
+            serialize_write_tuple(data, (const unsigned char *)"Header", strlen("Header"), 0,
+                                  data->header_names[i].str, data->header_names[i].len,
+                                  cell.str, cell.len, cell.quoted);
+          }
         }
       }
     }
@@ -202,12 +205,13 @@ static void serialize_row(void *hook) {
   if(data->err_msg)
     return;
 
-  // the first cell is the row ID
-  struct zsv_cell id = zsv_get_cell(data->parser, 0);
+  // get the row id
+  struct zsv_cell id = zsv_get_cell(data->parser, data->id_column_position);
 
   unsigned j = zsv_cell_count(data->parser);
-  for(unsigned i = 1; i < j && i < data->col_count; i++)
-    serialize_cell(data, id, i);
+  for(unsigned i = 0; i < j && i < data->col_count; i++)
+    if(i != data->id_column_position)
+      serialize_cell(data, id, i);
 }
 
 const char *serialize_usage_msg[] =
@@ -222,6 +226,7 @@ const char *serialize_usage_msg[] =
    "  -f,--filter <value>    : only output cells with text that contains the given value",
    "  -e,--entire            : match the entire cell's content (only applicable with -f)",
    "  -i,--case-insensitive  : use case-insensitive match for the filter value",
+   "  --id-column <n>        : the 1-based position of the column to use as the identifer (default=1, max=2000)",
    "  -p,--position          : output column position instead of name; the second column",
    "                           will be position 1, and the first row will be treated as a",
    "                           normal data row",
@@ -275,7 +280,7 @@ static int serialize_append_additional_column(struct serialize_data *data, const
   return 1;
 }
 
-int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used) {
+int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
   if(argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) {
     serialize_usage();
     return 0;
@@ -301,6 +306,14 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
         }
       } else if(!strcmp(arg, "-i") || !strcmp(arg, "--case-insensitive"))
         data.filter.case_insensitive = 1;
+      else if(!strcmp(arg, "--id-column")) {
+        if(arg_i + 1 < argc && atoi(argv[arg_i+1]) > 0 && atoi(argv[arg_i+1]) <= 2000)
+          data.id_column_position = atoi(argv[++arg_i]) - 1;
+        else {
+          fprintf(stderr, "%s option requires a value between 1 and 2000\n", argv[arg_i]);
+          err = 1;
+        }
+      }
       else if(!strcmp(arg, "-p") || !strcmp(arg, "--position"))
         data.use_column_position = 1;
       else if(!strcmp(arg, "-e") || !strcmp(arg, "--entire"))
@@ -345,7 +358,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
 
     opts->ctx = &data;
     data.csv_writer = zsv_writer_new(&writer_opts);
-    if(zsv_new_with_properties(opts, input_path, opts_used, &data.parser) != zsv_status_ok
+    if(zsv_new_with_properties(opts, custom_prop_handler, input_path, opts_used, &data.parser) != zsv_status_ok
        || !data.csv_writer) {
       serialize_cleanup(&data);
       return 1;

+ 54 - 14
csv.mod/zsv/app/sql.c

@@ -33,23 +33,26 @@ struct string_list {
 const char *zsv_sql_usage_msg[] =
   {
    APPNAME ": run ad hoc sql on a CSV file",
+   "          or join multiple CSV files on one or more common column(s)",
    "",
 #ifdef NO_STDIN
    "Usage: " APPNAME " <filename> [filename ...] <sql | @file.sql>",
 #else
-   "Usage: " APPNAME " [filename, or - for stdin] [filename ...] <sql | @file.sql>",
+   "Usage: " APPNAME " [filename, or - for stdin] [filename ...] <sql | @file.sql | --join-indexes <N,...>>",
 #endif
    "  e.g. " APPNAME " myfile.csv \"select * from data\"",
    "  e.g. " APPNAME " myfile.csv myfile2.csv \"select * from data inner join data2\"",
+   "  e.g. " APPNAME " myfile.csv myfile2.csv --join-indexes 1,2",
    "",
    "Loads your CSV file into a table named 'data', then runs your sql, which must start with 'select '.",
    "If multiple files are specified, tables will be named data, data2, data3, ...",
    "",
    "Options:",
    "  --join-indexes <n1...>: specify one or more column names to join multiple files by",
-   "     each n is treated as an index in the first input file that determines a column"
+   "     each n is treated as an index in the first input file that determines a column",
    "     of the join. For example, if joining two files that, respectively, have columns",
-   "     A,B,C,D and X,B,C,A,Y then `--join-indexes 1,3` will join on columns A and C",
+   "     A,B,C,D and X,B,C,A,Y then `--join-indexes 1,3` will join on columns A and C.",
+   "     When using this option, do not include an sql statement",
    "  -b: output with BOM",
    "  -C, --max-cols <n>    : change the maximum allowable columns. must be > 0 and < 2000",
    "  -o <output filename>  : name of file to save output to",
@@ -133,8 +136,7 @@ static char is_select_sql(const char *s) {
                      );
 }
 
-int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts,
-                               const char *opts_used) {
+int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
   /**
    * We need to pass the following data to the sqlite3 virtual table code:
    * a. zsv parser options indicated in the cmd line
@@ -160,9 +162,11 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
 
     // save current default opts so that we can restore them later
     struct zsv_opts original_default_opts = zsv_get_default_opts();
+    struct zsv_prop_handler original_default_custom_prop_handler = zsv_get_default_custom_prop_handler();
 
     // set parser opts that the sql module will get via zsv_get_default_opts()
     zsv_set_default_opts(*opts);
+    if(custom_prop_handler) zsv_set_default_custom_prop_handler(*custom_prop_handler);
 
     struct zsv_csv_writer_options writer_opts = zsv_writer_get_default_opts();
     int err = 0;
@@ -288,7 +292,10 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
 
     if(err) {
       zsv_sql_cleanup(&data);
-      zsv_set_default_opts(original_default_opts); // restore default options
+      if(custom_prop_handler) {
+        zsv_set_default_opts(original_default_opts); // restore default options
+        zsv_set_default_custom_prop_handler(original_default_custom_prop_handler);
+      }
       return 1;
     }
 
@@ -352,10 +359,31 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
         // sql template:
         // select t1.*, t2.*, t3.* from t1 left join (select * from t2 group by a) t2 left join (select * from t3 group by a) t3 using(a);
         sqlite3_stmt *stmt = NULL;
-        rc = sqlite3_prepare_v2(db, "select * from data", -1, &stmt, NULL);
-        if(rc != SQLITE_OK)
-          fprintf(stderr, "%s:\n  %s\n (or bad CSV/utf8 input)\n\n", sqlite3_errstr(err), "select * from data");
-        else {
+        const char *prefix_search = NULL;
+        const char *prefix_end = NULL;
+        if(my_sql) {
+          prefix_search = " from data ";
+          prefix_end = strstr(my_sql, prefix_search);
+          if(!prefix_end) {
+            prefix_search = " from data";
+            prefix_end = strstr(my_sql, prefix_search);
+            if(prefix_end && (prefix_end + strlen(prefix_search) != my_sql + strlen(my_sql)))
+              prefix_end = NULL;
+          }
+          if(!prefix_end || !prefix_search) {
+            err = 1;
+            fprintf(stderr, "Invalid sql: must contain 'from data'");
+          }
+        }
+
+        if(!err) {
+          rc = sqlite3_prepare_v2(db, "select * from data", -1, &stmt, NULL);
+          if(rc != SQLITE_OK) {
+            fprintf(stderr, "%s:\n  %s\n (or bad CSV/utf8 input)\n\n", sqlite3_errstr(err), "select * from data");
+            err = 1;
+          }
+        }
+        if(!err) {
           struct string_list **next_joined_column_name = &data.join_column_names;
           int col_count = sqlite3_column_count(stmt);
           for(char *ix_str = data.join_indexes; !err && ix_str && *ix_str && *(++ix_str); ix_str = strchr(ix_str + 1, ',')) {
@@ -399,13 +427,24 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
             int i = 2;
             for(struct string_list *sl = data.more_input_filenames; sl; sl = sl->next, i++) {
               sqlite3_str_appendf(select_clause, ", data%i.*", i);
-              // left join (select * from t2 group by a) t2
+              // left join (select * from t2 group by a) t2 using(x,...)
               sqlite3_str_appendf(from_clause, " left join (select * from data%i group by %s) data%i", i,
                                   sqlite3_str_value(group_by_clause), i);
+              sqlite3_str_appendf(from_clause, " using (%s)",
+                                  sqlite3_str_value(group_by_clause));
             }
-            asprintf(&data.sql_dynamic, "select %s from %s using (%s)",
-                     sqlite3_str_value(select_clause), sqlite3_str_value(from_clause),
-                     sqlite3_str_value(group_by_clause));
+
+            if(!prefix_end || !prefix_search)
+              asprintf(&data.sql_dynamic, "select %s from %s",
+                       sqlite3_str_value(select_clause), sqlite3_str_value(from_clause));
+            else {
+              asprintf(&data.sql_dynamic, "%.*s from %s%s%s", (int)(prefix_end - my_sql), my_sql,
+                       sqlite3_str_value(from_clause),
+                       strlen(prefix_end + strlen(prefix_search)) ? " " : "",
+                       strlen(prefix_end + strlen(prefix_search)) ?
+                       prefix_end + strlen(prefix_search) : "");
+            }
+
             my_sql = data.sql_dynamic;
             if(opts->verbose)
               fprintf(stderr, "Join sql:\n%s\n", my_sql);
@@ -465,6 +504,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
       free(tmpfn);
     }
     zsv_set_default_opts(original_default_opts); // restore default options
+    if(custom_prop_handler) zsv_set_default_custom_prop_handler(original_default_custom_prop_handler);
   }
   return 0;
 }

+ 4 - 3
csv.mod/zsv/app/stack.c

@@ -238,7 +238,7 @@ static void zsv_stack_data_row(void *ctx) {
   }
 }
 
-int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used) {
+int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used) {
   int err = 0;
   if(argc < 2) {
     zsv_stack_usage();
@@ -305,7 +305,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
 
     // to do: max_cell_size
     opts->stream = input->f;
-    if(zsv_new_with_properties(opts, input->fname, opts_used, &input->parser)
+    if(zsv_new_with_properties(opts, custom_prop_handler, input->fname, opts_used, &input->parser)
        != zsv_status_ok)
       data.err = 1;
     else {
@@ -347,6 +347,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
       }
     }
     zsv_delete(input->parser);
+    input->parser = NULL;
   }
 
   // not necessary, but free up unused memory by resizing each input's output_column_map
@@ -381,7 +382,7 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
       rewind(input->f);
       input->headers_done = 0;
       opts->stream = input->f;
-      if(zsv_new_with_properties(opts, input->fname, opts_used, &input->parser)
+      if(zsv_new_with_properties(opts, custom_prop_handler, input->fname, opts_used, &input->parser)
          != zsv_status_ok)
         data.err = 1;
       else {

+ 54 - 8
csv.mod/zsv/app/test/Makefile

@@ -100,7 +100,7 @@ test: ${TESTS}
 test-prop:
 	EXE=${BUILD_DIR}/bin/zsv_prop${EXE} make -C prop test
 
-test-echo : test-echo1 test-echo-overwrite test-echo-eol
+test-echo : test-echo1 test-echo-overwrite test-echo-eol test-echo-overwrite-csv
 
 test-echo1: ${BUILD_DIR}/bin/zsv_echo${EXE}
 	@${TEST_INIT}
@@ -119,7 +119,10 @@ test-echo-overwrite: ${BUILD_DIR}/bin/zsv_echo${EXE}
 	@${PREFIX} $< ${TEST_DATA_DIR}/loans_1.csv --overwrite 'sqlite3://${TEST_DATA_DIR}/loans_1-overwrite.db?sql=select row,col,value from overwrites order by row,col' ${REDIRECT} ${TMP_DIR}/[email protected]
 	@${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL}
 
-
+test-echo-overwrite-csv: ${BUILD_DIR}/bin/zsv_echo${EXE} ${TEST_DATA_DIR}/loans_1-overwrite.csv
+	@${TEST_INIT}
+	@${PREFIX} $< ${TEST_DATA_DIR}/loans_1.csv --overwrite '${TEST_DATA_DIR}/loans_1-overwrite.csv' ${REDIRECT} ${TMP_DIR}/[email protected]
+	@${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL}
 
 worldcitiespop_mil.csv:
 	curl -LOk 'https://burntsushi.net/stuff/worldcitiespop_mil.csv'
@@ -128,9 +131,9 @@ test-count test-count-pull: test-% : test-1-% test-2-%
 
 test-cli: ${CLI}
 	@${TEST_INIT}
-	@[ "${CLI}" = "" ] && echo 1>&2 'test-cli: missing CLI env var' && exit 1 || exit 0 
+	@[ "${CLI}" = "" ] && echo 1>&2 'test-cli: missing CLI env var' && exit 1 || exit 0
 	@$< help select 2>&1 > ${TMP_DIR}/[email protected]
-	@[ "`head -1 ${TMP_DIR}/[email protected]`" = "select: streaming CSV parser" ] && [ $$(( `cat ${TMP_DIR}/[email protected] | wc -l` )) = "35" ] && ${TEST_PASS} || ${TEST_FAIL}
+	@[ "`head -1 ${TMP_DIR}/[email protected]`" = "select: streaming CSV parser" ] && [ $$(( `cat ${TMP_DIR}/[email protected] | wc -l` )) = "36" ] && ${TEST_PASS} || ${TEST_FAIL}
 	@$< help count 2>&1 > ${TMP_DIR}/[email protected]
 	@[ "`head -1 ${TMP_DIR}/[email protected]`" = "Usage: count [options]" ] && [ $$(( `cat ${TMP_DIR}/[email protected] | wc -l` )) = "5" ] && ${TEST_PASS} || ${TEST_FAIL}
 
@@ -144,7 +147,7 @@ test-2-count test-2-count-pull: ${BUILD_DIR}/bin/zsv_count${EXE} ${TEST_DATA_DIR
 	@for x in 5000 5002 5004 5006 5008 5010 5013 5015 5017 5019 5021 5101 5105 5111 5113 5115 5117 5119 5121 5123 5125 5127 5129 5131 5211 5213 5215 5217 5311 5313 5315 5317 5413 5431 5433 5455 6133 ; do $< -r $$x ${TEST_DATA_DIR}/test/buffsplit_quote.csv ; done > ${TMP_DIR}/[email protected]
 	@${CMP} ${TMP_DIR}/[email protected] expected/test-2-count.out && ${TEST_PASS} || ${TEST_FAIL}
 
-test-select test-select-pull: test-% : test-n-% test-6-% test-7-% test-8-% test-9-% test-10-% test-quotebuff-% test-fixed-1-% test-fixed-2-% test-fixed-3-% test-fixed-4-% test-merge-%
+test-select test-select-pull: test-% : test-n-% test-6-% test-7-% test-8-% test-9-% test-10-% test-11-% test-12-% test-quotebuff-% test-fixed-1-% test-fixed-2-% test-fixed-3-% test-fixed-4-% test-merge-%
 
 test-merge-select test-merge-select-pull: test-merge-% : ${BUILD_DIR}/bin/zsv_%${EXE}
 	@${TEST_INIT}
@@ -159,17 +162,31 @@ test-quotebuff-select test-quotebuff-select-pull: test-quotebuff-% : ${BUILD_DIR
 
 test-n-select test-n-select-pull: test-n-% : ${BUILD_DIR}/bin/zsv_%${EXE}
 	@${TEST_INIT}
+
 	@${PREFIX} $< ${TEST_DATA_DIR}/loans_1.csv -u "?" -R 4 -d 2 ${REDIRECT} ${TMP_DIR}/test-n-$*.out
 	@${CMP} ${TMP_DIR}/test-n-$*.out expected/test-select.out && ${TEST_PASS} || ${TEST_FAIL}
+
+	@${PREFIX} $< ${TEST_DATA_DIR}/loans_1.csv -u "?" -H 10 -R 4 -d 2 -x 'Cash Out Amount' ${REDIRECT} ${TMP_DIR}/test-n-$*-x.out
+	@${CMP} ${TMP_DIR}/test-n-$*-x.out expected/test-select-x.out && ${TEST_PASS} || ${TEST_FAIL}
+
 	@${PREFIX} $< ${TEST_DATA_DIR}/test/embedded.csv -e 'X' ${REDIRECT} ${TMP_DIR}/test-2-$*.out
 	@${CMP} ${TMP_DIR}/test-2-$*.out expected/test-2-select.out && ${TEST_PASS} || ${TEST_FAIL}
+
 	@${PREFIX} $< ${TEST_DATA_DIR}/test/embedded_dos.csv -e 'X' ${REDIRECT} ${TMP_DIR}/test-3-$*.out
 	@${CMP} ${TMP_DIR}/test-3-$*.out expected/test-3-select.out && ${TEST_PASS} || ${TEST_FAIL}
+
 	@${PREFIX} $< ${TEST_DATA_DIR}/loans_1.csv -u "?" -R 4 -d 2 -N ${REDIRECT} ${TMP_DIR}/test-4-$*.out
 	@${CMP} ${TMP_DIR}/test-4-$*.out expected/test-4-select.out && ${TEST_PASS} || ${TEST_FAIL}
+
 	@${PREFIX} $<  ${TEST_DATA_DIR}/quoted.csv -e 'x' ${REDIRECT} ${TMP_DIR}/test-5-$*.out
 	@${CMP} ${TMP_DIR}/test-5-$*.out expected/test-5-select.out && ${TEST_PASS} || ${TEST_FAIL}
 
+	@${PREFIX} $<  ${TEST_DATA_DIR}/stack1.csv --no-header -H 10 ${REDIRECT} ${TMP_DIR}/test-no-header-$*.out
+	@${CMP} ${TMP_DIR}/test-no-header-$*.out expected/test-no-header-select.out && ${TEST_PASS} || ${TEST_FAIL}
+
+	@${PREFIX} $<  ${TEST_DATA_DIR}/stack1.csv --prepend-header 'my,Header.' -H 10 ${REDIRECT} ${TMP_DIR}/test-prepend-header-$*.out
+	@${CMP} ${TMP_DIR}/test-prepend-header-$*.out expected/test-prepend-header-select.out && ${TEST_PASS} || ${TEST_FAIL}
+
 test-6-select test-6-select-pull: test-6-% : ${BUILD_DIR}/bin/zsv_%${EXE}
 	@${TEST_INIT}
 	@${PREFIX} $< ${TEST_DATA_DIR}/test/tab.txt -t ${REDIRECT} ${TMP_DIR}/[email protected]
@@ -211,6 +228,15 @@ test-11-select test-11-select-pull: test-11-% : ${BUILD_DIR}/bin/zsv_%${EXE}
 	@${PREFIX} (echo "A1,B1" | $< --header-row "column1,column2") > /tmp/[email protected]
 	@cmp /tmp/[email protected] expected/test-11-select.out && ${TEST_PASS} || ${TEST_FAIL}
 
+test-12-select test-12-select-pull: test-12-% : ${BUILD_DIR}/bin/zsv_%${EXE}
+	@${TEST_INIT}
+	@${PREFIX} $< -n -x 1-2 -x 6 -x 8-10 -x 13- ${TEST_DATA_DIR}/test/desc.csv > /tmp/[email protected]
+	@${CMP} /tmp/[email protected] expected/test-12-select.out && ${TEST_PASS} || ${TEST_FAIL}
+
+test-13-select test-13-select-pull: test-13-% : ${BUILD_DIR}/bin/zsv_%${EXE}
+	@${TEST_INIT}
+	@${PREFIX} [ "$$(echo 'aaa,bb,c\na,bb\nx,y,z' | $< --header-row-span 2 | head -1)" = "aaa a,bb bb,c" ] && ${TEST_PASS} || ${TEST_FAIL}
+
 test-fixed-1-select test-fixed-1-select-pull: ${BUILD_DIR}/bin/zsv_select${EXE}
 	@${TEST_INIT}
 	@${PREFIX} $< ${TEST_DATA_DIR}/fixed.csv --fixed 3,7,12,18,20,21,22 ${REDIRECT} ${TMP_DIR}/[email protected]
@@ -271,7 +297,7 @@ test-blank-leading-rows-4: ${BUILD_DIR}/bin/zsv_select${EXE}
 	@${PREFIX} $< -R 2 ${TEST_DATA_DIR}/test/blank-leading-rows.csv ${REDIRECT} ${TMP_DIR}/[email protected] 2>&1
 	@${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL}
 
-test-stack: test-stack1 test-stack2
+test-stack: test-stack1 test-stack2 test-stack3
 
 test-stack1: ${BUILD_DIR}/bin/zsv_stack${EXE}
 	@${TEST_INIT}
@@ -283,6 +309,12 @@ test-stack2: ${BUILD_DIR}/bin/zsv_stack${EXE}
 	@${PREFIX} $< ${TEST_DATA_DIR}/stack2-[12].csv ${REDIRECT} ${TMP_DIR}/[email protected]
 	@${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL}
 
+test-stack3: ${BUILD_DIR}/bin/zsv_stack${EXE}
+	@${TEST_INIT}
+	@echo 'a,b,c' > /tmp/1.csv
+	@${PREFIX} $< /tmp/1.csv ${REDIRECT1} ${TMP_DIR}/[email protected]
+	@${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL}
+
 test-sql test-flatten : test-%: ${BUILD_DIR}/bin/zsv_%${EXE}
 	@${TEST_INIT}
 	@( ( ! [ -s "${TEST_DATA_DIR}/test/$*.csv" ] ) && echo "No test input for $*") || \
@@ -325,8 +357,13 @@ test-serialize-additional: ${BUILD_DIR}/bin/zsv_serialize${EXE} ${TEST_DATA_DIR}
 	@(${PREFIX} $< < ${TEST_DATA_DIR}/test/serialize.csv -a 'Interest Paid Through Date' -a 'original INTEREST Only Term' ${REDIRECT1} ${TMP_DIR}/[email protected] && \
 	${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL})
 
+test-serialize-position-id: ${BUILD_DIR}/bin/zsv_serialize${EXE} ${TEST_DATA_DIR}/test/serialize.csv
+	@${TEST_INIT}
+	@(${PREFIX} $< --id-column 3 < ${TEST_DATA_DIR}/test/serialize.csv -a 'Interest Paid Through Date' -a 'original INTEREST Only Term' ${REDIRECT1} ${TMP_DIR}/[email protected] && \
+	${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL})
+
 
-test-serialize : test-%: ${BUILD_DIR}/bin/zsv_%${EXE} test-serialize-quoted test-serialize-additional
+test-serialize : test-%: ${BUILD_DIR}/bin/zsv_%${EXE} test-serialize-quoted test-serialize-additional test-serialize-position-id
 	@${TEST_INIT}
 	@( ( ! [ -s "${TEST_DATA_DIR}/test/$*.csv" ] ) && echo "No test input for $*") || \
 	(${PREFIX} $< < ${TEST_DATA_DIR}/test/$*.csv ${REDIRECT1} ${TMP_DIR}/[email protected] && \
@@ -338,7 +375,7 @@ test-serialize : test-%: ${BUILD_DIR}/bin/zsv_%${EXE} test-serialize-quoted test
 	@(${PREFIX} $< -p < ${TEST_DATA_DIR}/test/$*.csv ${REDIRECT1} ${TMP_DIR}/[email protected] && \
 	${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL})
 
-test-sql: test-sql2 test-sql3 test-sql4
+test-sql: test-sql2 test-sql3 test-sql4 test-sql5
 test-sql2: ${BUILD_DIR}/bin/zsv_sql${EXE}
 	@${TEST_INIT}
 	@echo ${ARGS-sql} > ${TMP_DIR}/[email protected]
@@ -355,6 +392,12 @@ test-sql4: ${BUILD_DIR}/bin/zsv_sql${EXE}
 	@(${PREFIX} $< ${TEST_DATA_DIR}/test/blank-leading-rows.csv -d 2 'select * from data' ${REDIRECT1} ${TMP_DIR}/[email protected])
 	@${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL}
 
+test-sql5: ${BUILD_DIR}/bin/zsv_sql${EXE} # test blank rows
+	@${TEST_INIT}
+	@echo 'a,b,c' > /tmp/1.csv
+	@(${PREFIX} $< /tmp/1.csv 'select * from data' ${REDIRECT1} ${TMP_DIR}/[email protected])
+	@${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL}
+
 
 ${BUILD_DIR}/bin/zsv_%${EXE}:
 	make -C .. $@ CONFIGFILE=${CONFIGFILEPATH} DEBUG=${DEBUG}
@@ -443,3 +486,6 @@ test-compare: test-%: ${BUILD_DIR}/bin/zsv_%${EXE}
 
 	@(${PREFIX} $< compare/t1.csv compare/t7.csv compare/t3.csv --json-object -k c ${REDIRECT1} ${TMP_DIR}/[email protected] && \
 	${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL})
+
+	@(${PREFIX} $< compare/t1.csv compare/t7.csv compare/t3.csv --print-key-colname -k c ${REDIRECT1} ${TMP_DIR}/[email protected] && \
+	${CMP} ${TMP_DIR}/[email protected] expected/[email protected] && ${TEST_PASS} || ${TEST_FAIL})

+ 2 - 2
csv.mod/zsv/app/test/expected/test-compare.out5

@@ -2,5 +2,5 @@ C,Column,compare/t1.csv,compare/t5.csv,compare/t6-unsorted.csv
 C1,<key>,,,Missing
 C9-NONMATCHING,<key>,Missing,,Missing
 X2,B,B2,BB,BB
-C9-NONMATCHING,<key>,,,
-C1,<key>,,,
+C9-NONMATCHING,<key>,Missing,Missing,
+C1,<key>,Missing,Missing,

+ 6 - 0
csv.mod/zsv/app/test/expected/test-compare.out8

@@ -16,5 +16,11 @@
     "Column": "B",
     "compare/t7.csv": "",
     "compare/t3.csv": "BB"
+  },
+  {
+    "c": "X2",
+    "Column": "<key>",
+    "compare/t7.csv": "Missing",
+    "compare/t3.csv": "Missing"
   }
 ]

+ 5 - 0
csv.mod/zsv/app/test/expected/test-compare.out9

@@ -0,0 +1,5 @@
+c,Column,compare/t1.csv,compare/t7.csv,compare/t3.csv
+C1,B,X1,B1,X1
+C2,c,Missing,,
+C2,B,,,BB
+X2,c,,Missing,Missing

+ 1 - 0
csv.mod/zsv/app/utils/arg.c

@@ -73,6 +73,7 @@ void zsv_set_default_opts(struct zsv_opts opts) {
   *zsv_with_default_opts(0) = opts;
 }
 
+
 /**
  * str_array_index_of: return index in list, or size of list if not found
  */

+ 10 - 2
csv.mod/zsv/app/utils/cache.c

@@ -184,9 +184,17 @@ int zsv_modify_cache_file(const unsigned char *filepath,
       }
     }
     zsv_jq_delete(jqh);
-    if(!jqstat && zsv_replace_file(cache_tmp_fn, cache_fn))
-      err = zsv_printerr(-1, "Unable to save %s", cache_fn);
+
+    if(cache_data) {
+      fclose(cache_data);
+      cache_data = NULL;
+    }
     fclose(tmp);
+
+    if(!jqstat && zsv_replace_file(cache_tmp_fn, cache_fn)) {
+      err = zsv_printerr(-1, "Unable to save %s: ", cache_fn);
+      zsv_perror(NULL);
+    }
   }
 
   if(cache_data)

+ 2 - 0
csv.mod/zsv/app/utils/dirs-no-jq.c

@@ -0,0 +1,2 @@
+#define  ZSV_NO_JQ
+#include "dirs.c"

+ 7 - 3
csv.mod/zsv/app/utils/dirs.c

@@ -19,6 +19,7 @@
 #include <zsv/utils/string.h>
 #include <unistd.h> // unlink
 #include <sys/stat.h>
+#include <yajl_helper/yajl_helper.h>
 
 #if defined(_WIN32)
 #include <windows.h>
@@ -245,7 +246,7 @@ int zsv_foreach_dirent_aux(const char *dir_path,
   if(!dir_path)
     return 1;
 
-  if(max_depth > 0 && depth > max_depth)
+  if(max_depth > 0 && depth >= max_depth)
     return 0;
 
   DIR *dr;
@@ -269,11 +270,11 @@ int zsv_foreach_dirent_aux(const char *dir_path,
         char is_dir = h.stat.st_mode & S_IFDIR ? 1 : 0;
         h.is_dir = is_dir;
         if(handler)
-          err = handler(&h, depth + 1);
+          handler(&h, depth + 1);
 
         if(is_dir && !h.no_recurse)
           // recurse!
-          zsv_foreach_dirent_aux(tmp, depth + 1, max_depth, handler, ctx, verbose);
+          err = zsv_foreach_dirent_aux(tmp, depth + 1, max_depth, handler, ctx, verbose);
         free(tmp);
       }
     }
@@ -312,6 +313,9 @@ int zsv_remove_dir_recursive(const unsigned char *path) {
   return err;
 }
 
+#ifndef  ZSV_NO_JQ
 #include "dirs_to_json.c"
 
 #include "dirs_from_json.c"
+
+#endif

+ 5 - 1
csv.mod/zsv/app/utils/dirs_from_json.c

@@ -1,4 +1,4 @@
-#include <yajl_helper.h>
+#include <yajl_helper/yajl_helper.h>
 #include <zsv/utils/file.h>
 
 struct zsv_dir_from_json_ctx {
@@ -41,6 +41,10 @@ static int zsv_dir_from_json_map_key(struct yajl_helper_parse_state *st,
       asprintf(&fn, "%s%c%.*s", ctx->filepath_prefix, FILESLASH, (int)len, s);
     else
       asprintf(&fn, "%.*s", (int)len, s);
+
+    // if we have any backslashes, replace with fwd slash
+    if(fn)
+      for(int i = 0, j = strlen(fn); i < j; i++) if(fn[i] == '\\') fn[i] = '/';
     if(!fn) {
       errno = ENOMEM;
       perror(NULL);

+ 6 - 7
csv.mod/zsv/app/utils/file.c

@@ -16,7 +16,12 @@
 #include <zsv/utils/dirs.h>
 #include <zsv/utils/file.h>
 
-
+/**
+ * Get a temp file name. The returned value, if any, will have been allocated
+ * on the heap, and the caller should `free()`
+ *
+ * @param prefix string with which the resulting file name will be prefixed
+ */
 #if defined(_WIN32) || defined(WIN32) || defined(WIN)
 #include <windows.h>
 
@@ -37,12 +42,6 @@ char *zsv_get_temp_filename(const char *prefix) {
 }
 #else
 
-/**
- * Get a temp file name. The returned value, if any, will have been allocated
- * on the heap, and the caller should `free()`
- *
- * @param prefix string with which the resulting file name will be prefixed
- */
 char *zsv_get_temp_filename(const char *prefix) {
   char *s = NULL;
   char *tmpdir = getenv("TMPDIR");

+ 19 - 3
csv.mod/zsv/app/utils/os.c

@@ -6,8 +6,15 @@
  * https://opensource.org/licenses/MIT
  */
 
-#ifdef _WIN32
 #include <zsv/utils/os.h>
+#include <stdio.h>
+#ifndef _WIN32
+
+void zsv_perror(const char *s) {
+  perror(s);
+}
+
+#else
 #include <windows.h>
 #include <strsafe.h>
 
@@ -49,21 +56,24 @@ void zsv_win_to_unicode(const void *path, wchar_t *wbuf, size_t wbuf_len) {
   }
 }
 
-#include <stdio.h>
 #include <wchar.h>
 
 int zsv_replace_file(const void *src, const void *dest) {
   wchar_t wdest[PATH_MAX], wsrc[PATH_MAX];
+
   zsv_win_to_unicode(dest, wdest, ARRAY_SIZE(wdest));
   zsv_win_to_unicode(src, wsrc, ARRAY_SIZE(wsrc));
 
   if(ReplaceFileW(wdest, wsrc, NULL, REPLACEFILE_IGNORE_MERGE_ERRORS, 0, 0)) // success
     return 0;
-  else if(GetLastError() == 2) // file not found, could be target. use simple rename
+
+  if(GetLastError() == 2) // file not found, could be target. use simple rename
     return _wrename(wsrc, wdest); // returns 0 on success
+
   return 1; // fail
 }
 
+
 void zsv_win_printLastError() {
   DWORD dw = GetLastError();
   LPVOID lpMsgBuf;
@@ -88,4 +98,10 @@ void zsv_win_printLastError() {
   LocalFree(lpDisplayBuf);
 }
 
+void zsv_perror(const char *s) {
+  if(s && *s)
+    fwrite(s, 1, strlen(s), stderr);
+  zsv_win_printLastError(0);
+}
+
 #endif

+ 122 - 33
csv.mod/zsv/app/utils/prop.c

@@ -6,7 +6,56 @@
 #include <zsv/utils/prop.h>
 #include <zsv/utils/cache.h>
 #include <zsv/utils/file.h>
-#include <yajl_helper.h>
+#include <yajl_helper/yajl_helper.h>
+
+
+//////
+///////////
+#ifndef ZSVTLS
+# ifndef NO_THREADING
+#  define ZSVTLS _Thread_local
+# else
+#  define ZSVTLS
+# endif
+#endif
+/// see arg.c
+
+static struct zsv_prop_handler *zsv_with_default_custom_prop_handler(char mode) {
+  ZSVTLS static char zsv_default_custom_prop_handler_initd = 0;
+  ZSVTLS static struct zsv_prop_handler zsv_default_custom_prop_handler = { 0 };
+
+  switch(mode) {
+  case 'c': // clear
+    memset(&zsv_default_custom_prop_handler, 0, sizeof(zsv_default_custom_prop_handler));
+    zsv_default_custom_prop_handler_initd = 0;
+    break;
+  case 'g': // get
+    if(!zsv_default_custom_prop_handler_initd) {
+      zsv_default_custom_prop_handler_initd = 1;
+      zsv_default_custom_prop_handler.handler = NULL;
+      zsv_default_custom_prop_handler.ctx = NULL;
+    }
+    break;
+  }
+  return &zsv_default_custom_prop_handler;
+}
+
+ZSV_EXPORT
+void zsv_clear_default_custom_prop_handler() {
+  zsv_with_default_custom_prop_handler('c');
+}
+
+ZSV_EXPORT
+struct zsv_prop_handler zsv_get_default_custom_prop_handler() {
+  return *zsv_with_default_custom_prop_handler('g');
+}
+
+ZSV_EXPORT
+void zsv_set_default_custom_prop_handler(struct zsv_prop_handler custom_prop_handler) {
+  *zsv_with_default_custom_prop_handler(0) = custom_prop_handler;
+}
+///////////
+
 
 // to do: import these through a proper header
 static int zsv_properties_parse_process_value(struct yajl_helper_parse_state *st, struct json_value *value);
@@ -17,14 +66,43 @@ unsigned char *zsv_cache_filepath(const unsigned char *data_filepath,
 struct zsv_properties_parser {
   struct yajl_helper_parse_state st;
   yajl_status stat;
+
+  // queryable data
+  struct zsv_file_properties *fp;
+  struct zsv_opts *opts;
+  struct zsv_prop_handler *custom_prop_handler;
+  const unsigned char *filepath; // path to this properties file
+
 };
 
+const unsigned char *zsv_properties_parser_get_filepath(void *p_) {
+  struct zsv_properties_parser *p = p_;
+  return p ? p->filepath : NULL;
+}
+
+void *zsv_properties_parser_get_custom_ctx(void *p_) {
+  struct zsv_properties_parser *p = p_;
+  return p && p->custom_prop_handler ? p->custom_prop_handler->ctx : NULL;
+}
+
+struct zsv_opts *zsv_properties_parser_get_opts(void *p_) {
+  struct zsv_properties_parser *p = p_;
+  return p ? p->opts : NULL;
+}
+
 /**
  * Create a new properties parser
  */
-struct zsv_properties_parser *zsv_properties_parser_new(struct zsv_file_properties *fp) {
+struct zsv_properties_parser *zsv_properties_parser_new(const unsigned char *path,
+                                                        struct zsv_prop_handler *custom_prop_handler,
+                                                        struct zsv_file_properties *fp,
+                                                        struct zsv_opts *opts) {
   struct zsv_properties_parser *parser = calloc(1, sizeof(*parser));
+  parser->custom_prop_handler = custom_prop_handler;
   if(parser) {
+    parser->fp = fp;
+    parser->filepath = path;
+    parser->opts = opts;
     parser->stat =
       yajl_helper_parse_state_init(&parser->st, 32,
                                    NULL, // start_map,
@@ -33,7 +111,7 @@ struct zsv_properties_parser *zsv_properties_parser_new(struct zsv_file_properti
                                    NULL, // start_array,
                                    NULL, // end_array,
                                    zsv_properties_parse_process_value,
-                                   fp);
+                                   parser);
   }
   return parser;
 }
@@ -68,51 +146,49 @@ enum zsv_status zsv_properties_parser_destroy(struct zsv_properties_parser *pars
  *
  * @param data_filepath            required file path
  * @param opts (optional)          parser options to load
- * @param fp (optional)            parsed file properties
+ * @param custom_prop_handler (optional) handler for custom properties
  * @param cmd_opts_used (optional) cmd option codes to skip + warn if found
  * @return zsv_status_ok on success
  */
-enum zsv_status zsv_cache_load_props(const char *data_filepath,
-                                     struct zsv_opts *opts,
-                                     struct zsv_file_properties *fp,
-                                     const char *cmd_opts_used) {
+struct zsv_file_properties zsv_cache_load_props(const char *data_filepath,
+                                                struct zsv_opts *opts,
+                                                struct zsv_prop_handler *custom_prop_handler,
+                                                const char *cmd_opts_used) {
   // we need some memory to save the parsed properties
   // if the caller did not provide that, use our own
   struct zsv_file_properties tmp = { 0 };
-  if(!fp)
-    fp = &tmp;
-  if(!(data_filepath && *data_filepath)) return 0; // e.g. input = stdin
+  if(!(data_filepath && *data_filepath)) return tmp; // e.g. input = stdin
 
-  enum zsv_status stat = zsv_status_ok;
+  struct zsv_file_properties *fp = &tmp;
   struct zsv_properties_parser *p = NULL;
   unsigned char *fn = zsv_cache_filepath((const unsigned char *)data_filepath,
                                          zsv_cache_type_property, 0, 0);
   if(!fn)
-    stat = zsv_status_memory;
+    tmp.stat = zsv_status_memory;
   else {
     FILE *f;
     int err;
     if(!zsv_file_readable((char *)fn, &err, &f)) {
       if(err != ENOENT) {
         perror((const char *)fn);
-        stat = zsv_status_error;
+        tmp.stat = zsv_status_error;
       }
     } else {
-      p = zsv_properties_parser_new(fp);
+      p = zsv_properties_parser_new(fn, custom_prop_handler, fp, opts);
       if(!p)
-        stat = zsv_status_memory;
+        tmp.stat = zsv_status_memory;
       else if(p->stat != yajl_status_ok)
-        stat = zsv_status_error;
+        tmp.stat = zsv_status_error;
       else {
         unsigned char buff[1024];
         size_t bytes_read;
         while((bytes_read = fread(buff, 1, sizeof(buff), f))) {
           if((p->stat = yajl_parse(p->st.yajl, buff, bytes_read)) != yajl_status_ok) {
-            stat = zsv_status_error;
+            tmp.stat = zsv_status_error;
             break;
           }
         }
-        if(stat == zsv_status_ok)
+        if(tmp.stat == zsv_status_ok)
           zsv_properties_parse_complete(p);
       }
       fclose(f);
@@ -120,7 +196,7 @@ enum zsv_status zsv_cache_load_props(const char *data_filepath,
     free(fn);
   }
 
-  if(stat == zsv_status_ok) {
+  if(tmp.stat == zsv_status_ok) {
     // warn if the loaded properties conflict with command-line options
     if(fp->skip_specified) {
       if(cmd_opts_used && strchr(cmd_opts_used, 'R'))
@@ -135,15 +211,16 @@ enum zsv_status zsv_cache_load_props(const char *data_filepath,
         opts->header_span = fp->header_span;
     }
   }
-  if(p && stat == zsv_status_ok
+  if(p && tmp.stat == zsv_status_ok
      && zsv_properties_parser_destroy(p) != zsv_status_ok
      )
-    stat = zsv_status_error;
-  return stat;
+    tmp.stat = zsv_status_error;
+  return tmp;
 }
 
 static int zsv_properties_parse_process_value(struct yajl_helper_parse_state *st, struct json_value *value) {
-  struct zsv_file_properties *fp = st->data;
+  struct zsv_properties_parser *parser = st->data;
+  struct zsv_file_properties *fp = parser->fp;
   if(st->level == 1) {
     const char *prop_name = yajl_helper_get_map_key(st, 0);
     unsigned int *target = NULL;
@@ -154,14 +231,23 @@ static int zsv_properties_parse_process_value(struct yajl_helper_parse_state *st
       target = &fp->header_span;
       fp->header_span_specified = 1;
     }
+
     if(!target) {
-      fprintf(stderr, "Unrecognized property: %s\n", prop_name);
-      fp->err = 1;
+      int rc = 1;
+      struct zsv_prop_handler *custom_prop_handler = parser->custom_prop_handler;
+      if(custom_prop_handler && custom_prop_handler->handler)
+        rc = custom_prop_handler->handler(parser, prop_name, value);
+      if(rc) {
+        fprintf(stderr, "Unrecognized property: %s\n", prop_name);
+        fp->stat = zsv_status_error;
+      }
     } else {
-      long long i = json_value_long(value, &fp->err);
-      if(fp->err || i < 0 || i > UINT_MAX)
+      int err = 0;
+      long long i = json_value_long(value, &err);
+      if(err || i < 0 || i > UINT_MAX) {
+        fp->stat = zsv_status_error;
         fprintf(stderr, "Invalid %s property value: should be an integer between 0 and %u", prop_name, UINT_MAX);
-      else
+      } else
         *target = (unsigned int) i;
     }
   }
@@ -174,18 +260,21 @@ static int zsv_properties_parse_process_value(struct yajl_helper_parse_state *st
  * specified input file. In the event that saved properties conflict with a
  * command-line option, the command-line option "wins" (the property value is
  * ignored), but a warning is printed
+ *
+ * optional `struct zsv_file_properties` supports custom file property processing
  */
 enum zsv_status zsv_new_with_properties(struct zsv_opts *opts,
+                                        struct zsv_prop_handler *custom_prop_handler,
                                         const char *input_path,
                                         const char *opts_used,
                                         zsv_parser *handle_out
                                         ) {
-  enum zsv_status stat = zsv_status_ok;
   *handle_out = NULL;
   if(input_path) {
-    stat = zsv_cache_load_props(input_path, opts, NULL, opts_used);
-    if(stat != zsv_status_ok)
-      return stat;
+    struct zsv_file_properties fp =
+      zsv_cache_load_props(input_path, opts, custom_prop_handler, opts_used);
+    if(fp.stat != zsv_status_ok)
+      return fp.stat;
   }
   if((*handle_out = zsv_new(opts)))
     return zsv_status_ok;

+ 6 - 0
csv.mod/zsv/app/utils/string.c

@@ -16,6 +16,10 @@
 #include <zsv/utils/utf8.h>
 #include <zsv/utils/string.h>
 
+#ifdef ZSV_UTILS_STRING_STANDALONE
+#include "../../src/zsv_strencode.c"
+#endif
+
 #ifndef NO_UTF8PROC
 #include <utf8proc.h>
 
@@ -326,8 +330,10 @@ size_t zsv_strunescape_backslash(unsigned char *s, size_t len) {
   return j;
 }
 
+#ifndef ZSV_STRING_LIB_ONLY
 struct zsv_cell zsv_get_cell_trimmed(zsv_parser parser, size_t ix) {
   struct zsv_cell c = zsv_get_cell(parser, ix);
   c.str = (unsigned char *)zsv_strtrim(c.str, &c.len);
   return c;
 }
+#endif

+ 41 - 16
csv.mod/zsv/app/utils/writer.c

@@ -7,6 +7,7 @@
  */
 
 #include <zsv/utils/writer.h>
+#include <zsv/utils/compiler.h>
 #include <stdio.h>
 #include <ctype.h>
 #include <string.h>
@@ -34,7 +35,7 @@ void zsv_writer_set_default_opts(struct zsv_csv_writer_options opts) {
   zsv_csv_writer_default_opts = opts;
 }
 
-struct zsv_csv_writer_options zsv_writer_get_default_opts() {
+struct zsv_csv_writer_options zsv_writer_get_default_opts(void) {
   if(!zsv_writer_default_opts_initd) {
     zsv_writer_default_opts_initd = 1;
     zsv_csv_writer_default_opts.write = (size_t (*)(const void * restrict,  size_t,  size_t,  void * restrict))fwrite;
@@ -116,6 +117,8 @@ struct zsv_writer_data {
   void (*table_init)(void *);
   void *table_init_ctx;
 
+  const char *cell_prepend;
+
   unsigned char with_bom:1;
   unsigned char started:1;
   unsigned char _:6;
@@ -198,21 +201,10 @@ enum zsv_writer_status zsv_writer_delete(zsv_csv_writer w) {
   return zsv_writer_status_ok;
 }
 
-enum zsv_writer_status zsv_writer_cell(zsv_csv_writer w, char new_row,
+static inline
+enum zsv_writer_status zsv_writer_cell_aux(zsv_csv_writer w,
                                            const unsigned char *s, size_t len,
                                            char check_if_needs_quoting) {
-  if(!w) return zsv_writer_status_missing_handle;
-  if(!w->started) {
-    if(w->table_init)
-      w->table_init(w->table_init_ctx);
-    if(w->with_bom)
-      zsv_output_buff_write(&w->out, (const unsigned char *)"\xef\xbb\xbf", 3);
-    w->started = 1;
-  } else if(new_row)
-    zsv_output_buff_write(&w->out, (const unsigned char *)"\n", 1);
-  else
-    zsv_output_buff_write(&w->out, (const unsigned char *)",", 1);
-
   if(len) {
     if(check_if_needs_quoting) {
       unsigned char *quoted_s = zsv_csv_quote(s, len, w->buff, w->buffsize);
@@ -229,6 +221,39 @@ enum zsv_writer_status zsv_writer_cell(zsv_csv_writer w, char new_row,
   return zsv_writer_status_ok;
 }
 
+enum zsv_writer_status zsv_writer_cell(zsv_csv_writer w, char new_row,
+                                       const unsigned char *s, size_t len,
+                                       char check_if_needs_quoting) {
+  if(!w) return zsv_writer_status_missing_handle;
+  if(!w->started) {
+    if(w->table_init)
+      w->table_init(w->table_init_ctx);
+    if(w->with_bom)
+      zsv_output_buff_write(&w->out, (const unsigned char *)"\xef\xbb\xbf", 3);
+    w->started = 1;
+  } else if(new_row)
+    zsv_output_buff_write(&w->out, (const unsigned char *)"\n", 1);
+  else
+    zsv_output_buff_write(&w->out, (const unsigned char *)",", 1);
+
+  if(VERY_UNLIKELY(w->cell_prepend && *w->cell_prepend)) {
+    char *tmp = NULL;
+    asprintf(&tmp, "%s%.*s", w->cell_prepend, (int)len, s ? s : (const unsigned char *)"");
+    if(!tmp)
+      return zsv_writer_status_error; // zsv_writer_status_memory;
+    s = (const unsigned char *)tmp;
+    len = len + strlen(w->cell_prepend);
+    enum zsv_writer_status stat = zsv_writer_cell_aux(w, s, len, 1);
+    free(tmp);
+    return stat;
+  }
+  return zsv_writer_cell_aux(w, s, len, check_if_needs_quoting);
+}
+
+void zsv_writer_cell_prepend(zsv_csv_writer w, const unsigned char *s) {
+  w->cell_prepend = (const char *)s;
+}
+
 enum zsv_writer_status zsv_writer_cell_Lf(zsv_csv_writer w, char new_row, const char *fmt_spec,
                                               long double ldbl) {
   char s[128];
@@ -260,8 +285,8 @@ enum zsv_writer_status zsv_writer_cell_zu(zsv_csv_writer w, char new_row, size_t
 }
 
 enum zsv_writer_status zsv_writer_cell_s(zsv_csv_writer w, char new_row,
-                                             const unsigned char *s,
-                                             char check_if_needs_quoting) {
+                                         const unsigned char *s,
+                                         char check_if_needs_quoting) {
   return zsv_writer_cell(w, new_row, s, s ? strlen((const char *)s) : 0,
                            check_if_needs_quoting);
 }

+ 1 - 1
csv.mod/zsv/app/zsv_command_standalone.c

@@ -12,6 +12,6 @@ int main(int argc, const char *argv[]) {
   enum zsv_status stat = zsv_args_to_opts(argc, argv, &argc, argv, &opts, opts_used);
   if(stat != zsv_status_ok)
     return stat;
-  return ZSV_MAIN_FUNC(ZSV_COMMAND)(argc, argv, &opts, opts_used);
+  return ZSV_MAIN_FUNC(ZSV_COMMAND)(argc, argv, &opts, NULL, opts_used);
 #endif
 }

+ 3 - 1
csv.mod/zsv/app/zsv_main.h

@@ -12,9 +12,11 @@
 #define ZSV_MAIN_FUNC1(x) zsv_ ## x ## _main
 #define ZSV_MAIN_NO_OPTIONS_FUNC1(x) zsv_ ## x ## _main_no_options
 
+struct zsv_opts;
+
 /* macros for commands that use common zsv parsing */
 #define ZSV_MAIN_FUNC(x) ZSV_MAIN_FUNC1(x)
-#define ZSV_MAIN_DECL(x) int ZSV_MAIN_FUNC(x)(int argc, const char *argv[], struct zsv_opts *opts, const char *opts_used)
+#define ZSV_MAIN_DECL(x) int ZSV_MAIN_FUNC(x)(int argc, const char *argv[], struct zsv_opts *opts, struct zsv_prop_handler *custom_prop_handler, const char *opts_used)
 
 /* macros for commands that do not use common zsv parsing */
 #define ZSV_MAIN_NO_OPTIONS_FUNC(x) ZSV_MAIN_NO_OPTIONS_FUNC1(x)

+ 6 - 0
csv.mod/zsv/data/loans_1-overwrite.csv

@@ -0,0 +1,6 @@
+row,col,value
+8,3,hello3
+8,15,hello4
+8,99999,never
+10,2,hello2
+10,4,hello1

+ 26 - 0
csv.mod/zsv/include/zsv/common.h

@@ -53,6 +53,7 @@ struct zsv_cell {
   /**
    * bitfield values for `quoted` flags
    */
+#  define ZSV_PARSER_QUOTE_NONE     0 /* content does not need to be quoted */
 #  define ZSV_PARSER_QUOTE_UNCLOSED 1 /* only used internally by parser */
 #  define ZSV_PARSER_QUOTE_CLOSED   2 /* value was quoted */
 #  define ZSV_PARSER_QUOTE_NEEDED   4 /* value contains delimiter or dbl-quote */
@@ -66,6 +67,7 @@ struct zsv_cell {
    * quoting or escaping will be required
    */
   char quoted;
+  unsigned char overwritten:1;
 };
 
 typedef size_t (*zsv_generic_write)(const void * restrict,  size_t,  size_t,  void * restrict);
@@ -86,6 +88,25 @@ typedef int (*zsv_progress_callback)(void *ctx, size_t cumulative_row_count);
  * @param exit code
  */
 typedef void (*zsv_completed_callback)(void *ctx, int code);
+
+/**
+ * Data can be "overwritten" on-the-fly by providing a source for
+ *   (row, column, value) tuples
+ * Supported source formats are CSV and SQLITE3
+ */
+enum zsv_overwrite_type {
+  zsv_overwrite_type_unknown = 0, // do not change
+  zsv_overwrite_type_none = 1, // do not change
+  zsv_overwrite_type_csv
+  // to do: zsv_overwrite_type_sqlite3
+};
+
+struct zsv_opt_overwrite {
+  enum zsv_overwrite_type type;
+  void *ctx;
+  int (*close_ctx)(void *);
+};
+
 # endif
 
 struct zsv_opts {
@@ -269,6 +290,11 @@ struct zsv_opts {
    */
   size_t max_rows;
 
+  /**
+   * Optional cell-level values that overwrite data returned to the caller by the API
+   */
+  struct zsv_opt_overwrite overwrite;
+
 # endif
 };
 

+ 6 - 0
csv.mod/zsv/include/zsv/utils/dirs.h

@@ -43,6 +43,12 @@ size_t zsv_get_executable_path(char* buff, size_t buffsize);
  */
 int zsv_dir_exists(const char *path);
 
+/**
+ * mkdir that works with UNC paths on Win
+ * if fail, use zsv_perror() instead of perror()
+ */
+int zsv_mkdir(const char *path);
+
 /**
  * Make a directory, as well as any intermediate dirs
  * return zero on success

+ 2 - 1
csv.mod/zsv/include/zsv/utils/os.h

@@ -9,10 +9,11 @@
 #ifndef ZSV_OS_H
 #define ZSV_OS_H
 
+void zsv_perror(const char *);
+
 #ifndef _WIN32
 # define zsv_replace_file(src, dest) (rename((const char *)src, (const char *)dest))
 
-
 #else
 
 # include <windows.h>

+ 37 - 8
csv.mod/zsv/include/zsv/utils/prop.h

@@ -1,10 +1,13 @@
 #ifndef ZSV_PROP_H
 #define ZSV_PROP_H
 
+#include <zsv/common.h>
+#include <yajl_helper/yajl_helper.h>
+
 struct zsv_file_properties {
+  enum zsv_status stat;
   unsigned int skip;
   unsigned int header_span;
-  int err;
 
   /* flags used by parser only to indicate whether property was specified */
   unsigned int skip_specified:1;
@@ -12,6 +15,26 @@ struct zsv_file_properties {
   unsigned int _:6;
 };
 
+struct zsv_prop_handler {
+  // handler should return 0 on success or non-zero on error. a non-zero return code will abort the property processing
+  int (*handler)(void *property_parser, const char *property_name, struct json_value *value);
+  void *ctx;
+};
+void *zsv_properties_parser_get_custom_ctx(void *property_parser);
+const unsigned char *zsv_properties_parser_get_filepath(void *property_parser);
+struct zsv_opts *zsv_properties_parser_get_opts(void *property_parser_);
+
+/**
+ * set or get default custom property handler
+ */
+void zsv_set_default_custom_prop_handler(struct zsv_prop_handler custom_prop_handler);
+
+struct zsv_prop_handler zsv_get_default_custom_prop_handler(void);
+
+void zsv_clear_default_custom_prop_handler(void);
+
+
+
 /**
  * Load cached file properties into a zsp_opts and/or zsv_file_properties struct
  * If cmd_opts_used is provided, then do not set any zsv_opts values, if the
@@ -20,20 +43,23 @@ struct zsv_file_properties {
  *
  * @param data_filepath            required file path
  * @param opts (optional)          parser options to load
- * @param fp (optional)            parsed file properties
+ * @param custom_prop              custom property handler
  * @param cmd_opts_used (optional) cmd option codes to skip + warn if found
- * @return zsv_status_ok on success
+ * @return struct zsv_file_properties, with .stat set to zsv_status_ok on success
  */
-enum zsv_status zsv_cache_load_props(const char *data_filepath,
-                                     struct zsv_opts *opts,
-                                     struct zsv_file_properties *fp,
-                                     const char *cmd_opts_used);
+struct zsv_file_properties zsv_cache_load_props(const char *data_filepath,
+                                                struct zsv_opts *opts,
+                                                struct zsv_prop_handler *custom_prop,
+                                                const char *cmd_opts_used);
 
 /**
  * Create a new properties parser
  */
 struct zsv_properties_parser;
-struct zsv_properties_parser *zsv_properties_parser_new(struct zsv_file_properties *fp);
+struct zsv_properties_parser *zsv_properties_parser_new(const unsigned char *path,
+                                                        struct zsv_prop_handler *custom_prop_handler,
+                                                        struct zsv_file_properties *fp,
+                                                        struct zsv_opts *opts);
 
 /**
  * Finished parsing
@@ -53,6 +79,7 @@ enum zsv_status zsv_properties_parser_destroy(struct zsv_properties_parser *pars
  * ignored), but a warning is printed
  *
  * @param opts       parser options. see `zsv_new()`
+ * @param cust_prop  optional custom file property handler
  * @param input_path path of file whose zsv properties should be loaded. this
  *                   param is used solely for loading properties and has no
  *                   impact on the data that is actually parsed, which is
@@ -63,6 +90,7 @@ enum zsv_status zsv_properties_parser_destroy(struct zsv_properties_parser *pars
  * @param handle_out returns zsv parser handle, or NULL on fail
  */
 enum zsv_status zsv_new_with_properties(struct zsv_opts *opts,
+                                        struct zsv_prop_handler *custom_prop,
                                         const char *input_path,
                                         const char *opts_used,
                                         zsv_parser *handle_out
@@ -79,6 +107,7 @@ enum zsv_status zsv_new_with_properties(struct zsv_opts *opts,
  * @param max_depth   : maximum depth of any property file. if is_prop_file was NULL,
  *                      max_depth is set to 1
  */
+
 #include "dirs.h"
 
 struct zsv_dir_filter; /* opaque structure for internal use */

+ 9 - 1
csv.mod/zsv/include/zsv/utils/writer.h

@@ -24,7 +24,7 @@ struct zsv_csv_writer_options {
 };
 
 void zsv_writer_set_default_opts(struct zsv_csv_writer_options opts);
-struct zsv_csv_writer_options zsv_writer_get_default_opts();
+struct zsv_csv_writer_options zsv_writer_get_default_opts(void);
 
 enum zsv_writer_status {
   zsv_writer_status_ok = 0,
@@ -74,4 +74,12 @@ enum zsv_writer_status zsv_writer_cell_Lf(zsv_csv_writer w, char new_row,
 // write a blank cell
 enum zsv_writer_status zsv_writer_cell_blank(zsv_csv_writer w, char new_row);
 
+/**
+ * set a value to be prepended to the next cell value that is written
+ *
+ * @param w  handle to the writer
+ * @param s  text value to write (can be NULL)
+ */
+void zsv_writer_cell_prepend(zsv_csv_writer w, const unsigned char *s);
+
 #endif

+ 3 - 0
csv.mod/zsv/scripts/ci-freebsd-setup.sh

@@ -0,0 +1,3 @@
+#!/bin/sh
+
+sudo pkg install -y tree zip git autotools gmake lang/gcc

+ 8 - 7
csv.mod/zsv/src/Makefile

@@ -112,11 +112,14 @@ endif
 
 
 help:
-	@echo "${MAKE} [CONFIGFILE=config.mk] [NO_UTF8_CHECK=1] [VERBOSE=1] [LIBDIR=${LIBDIR}] [INCLUDEDIR=${INCLUDEDIR}] [LIB_SUFFIX=]"
+	@echo "Make options:"
+	@echo "  `basename ${MAKE}` build|install|uninstall|clean"
+	@echo
+	@echo "Optional ake variables:"
+	@echo "  [CONFIGFILE=config.mk] [NO_UTF8_CHECK=1] [VERBOSE=1] [LIBDIR=${LIBDIR}] [INCLUDEDIR=${INCLUDEDIR}] [LIB_SUFFIX=]"
+	@echo
 
-lib: ${LIBZSV}
-
-all: lib
+build: ${LIBZSV}
 
 ${LIBZSV}: ${ZSV_OBJ}
 	@mkdir -p `dirname "$@"`
@@ -126,13 +129,11 @@ ${LIBZSV}: ${ZSV_OBJ}
 	$(AR) -t $@ # check it is there
 	@echo Built $@
 
-
 install: ${LIBZSV_INSTALL}
 	@mkdir -p  $(INCLUDEDIR)
 	@cp -pR ../include/* $(INCLUDEDIR)
 	@echo "include files copied to $(INCLUDEDIR)"
 
-
 ${LIBZSV_INSTALL}: ${LIBZSV}
 	@mkdir -p `dirname "$@"`
 	@cp -p ${LIBZSV} "$@"
@@ -145,7 +146,7 @@ uninstall:
 clean:
 	rm -rf ${BUILD_DIR}/objs ${LIBZSV}
 
-.PHONY: all install clean lib ${LIBZSV_INSTALL}
+.PHONY: build install uninstall clean  ${LIBZSV_INSTALL}
 
 ${BUILD_DIR}/objs/zsv.o: zsv.c zsv_internal.c
 	@mkdir -p `dirname "$@"`

+ 23 - 48
csv.mod/zsv/src/zsv.c

@@ -11,12 +11,13 @@
 #endif
 
 #include "zsv.h"
-#include <zsv/utils/utf8.h>
 #include <zsv/utils/compiler.h>
 #ifdef ZSV_EXTRAS
 #include <zsv/utils/arg.h>
 #endif
 
+static struct zsv_cell zsv_get_cell_1(zsv_parser parser, size_t ix);
+static struct zsv_cell zsv_get_cell_with_overwrite(zsv_parser parser, size_t col_ix);
 #include "zsv_internal.c"
 
 #ifndef ZSV_VERSION
@@ -28,45 +29,7 @@ const char *zsv_lib_version(void) {
   return ZSV_VERSION;
 }
 
-/**
- * Ensure valid UTF8 encoding by, if needed, replacing malformed bytes
- */
-ZSV_EXPORT
-size_t zsv_strencode(unsigned char *s, size_t n, unsigned char replace,
-                     int (*malformed_handler)(void *, const unsigned char *s, size_t n, size_t offset), void *handler_ctx) {
-  size_t new_len = 0;
-  int clen;
-  for(size_t i2 = 0; i2 < n; i2 += (size_t)clen) {
-    clen = ZSV_UTF8_CHARLEN(s[i2]);
-    if(LIKELY(clen == 1))
-      s[new_len++] = s[i2];
-    else if(UNLIKELY(clen < 0) || UNLIKELY(i2 + clen >= n)) {
-      if(malformed_handler)
-        malformed_handler(handler_ctx, s, n, new_len);
-      if(replace)
-        s[new_len++] = replace;
-      clen = 1;
-    } else { /* might be valid multi-byte utf8; check */
-      unsigned char valid_n;
-      for(valid_n = 1; valid_n < clen; valid_n++)
-        if(!ZSV_UTF8_SUBSEQUENT_CHAR_OK(s[i2 + valid_n]))
-          break;
-      if(valid_n == clen) { /* valid_n utf8; copy it */
-        memmove(s + new_len, s + i2, clen);
-        new_len += clen;
-      } else { /* invalid; valid_n smaller than expected */
-        if(malformed_handler)
-          malformed_handler(handler_ctx, s, n, new_len);
-        if(replace) {
-          memset(s + new_len, replace, valid_n);
-          new_len += valid_n;
-        }
-        clen = valid_n;
-      }
-    }
-  }
-  return new_len; // new length
-}
+#include "zsv_strencode.c"
 
 /**
  * When we parse a chunk, if it was not the first parse call, we might have a partial
@@ -271,16 +234,20 @@ char zsv_quoted(zsv_parser parser) {
   return parser->quoted || parser->opts.no_quotes;
 }
 
-// to do: benchmark returning zsv_cell struct vs just a zsv_cell pointer
-ZSV_EXPORT
-struct zsv_cell zsv_get_cell(zsv_parser parser, size_t ix) {
-  if(ix < parser->row.used)
+static struct zsv_cell zsv_get_cell_1(zsv_parser parser, size_t ix) {
+  if(VERY_LIKELY(ix < parser->row.used))
     return parser->row.cells[ix];
 
-  struct zsv_cell c = { 0, 0, 0 };
+  struct zsv_cell c = { 0, 0, 0, 0 };
   return c;
 }
 
+// to do: benchmark returning zsv_cell struct vs just a zsv_cell pointer
+ZSV_EXPORT
+struct zsv_cell zsv_get_cell(zsv_parser parser, size_t ix) {
+  return parser->get_cell(parser, ix);
+}
+
 /**
  * `zsv_get_cell_len()` is not needed in most cases, but may be useful in
  * restrictive cases such as when calling from Javascript into wasm
@@ -294,7 +261,7 @@ size_t zsv_get_cell_len(zsv_parser parser, size_t ix) {
 
 ZSV_EXPORT
 unsigned char *zsv_get_cell_str(zsv_parser parser, size_t ix) {
-  struct zsv_cell c = zsv_get_cell(parser, ix);
+  struct zsv_cell c = zsv_get_cell_1(parser, ix);
   return c.len ? c.str : NULL;
 }
 
@@ -388,14 +355,14 @@ enum zsv_status zsv_finish(struct zsv_scanner *scanner) {
     }
 
     if((scanner->quoted & ZSV_PARSER_QUOTE_UNCLOSED)
-       && scanner->partial_row_length > scanner->cell_start + 1) {
+       && scanner->partial_row_length > scanner->cell_start) {
       int quote = '"';
       scanner->quoted |= ZSV_PARSER_QUOTE_CLOSED;
       scanner->quoted -= ZSV_PARSER_QUOTE_UNCLOSED;
       if(scanner->last == quote)
         scanner->quote_close_position = scanner->partial_row_length - scanner->cell_start;
       else {
-        scanner->quote_close_position = scanner->partial_row_length - scanner->cell_start + 1;
+        scanner->quote_close_position = scanner->partial_row_length - scanner->cell_start;
         scanner->scanned_length++;
       }
     }
@@ -431,6 +398,14 @@ enum zsv_status zsv_delete(zsv_parser parser) {
     free(parser->fixed.offsets);
     collate_header_destroy(&parser->collate_header);
     free(parser->pull.regs);
+
+#ifdef ZSV_EXTRAS
+  if(parser->overwrite.ctx && parser->overwrite.close_ctx)
+    parser->overwrite.close_ctx(parser->overwrite.ctx);
+  if(parser->overwrite.reader && parser->overwrite.close_reader)
+    parser->overwrite.close_reader(parser->overwrite.reader);
+#endif
+
     free(parser);
   }
   return zsv_status_ok;

+ 161 - 9
csv.mod/zsv/src/zsv_internal.c

@@ -89,6 +89,31 @@ struct zsv_scan_fixed_regs {
   char xx; // to do
 };
 
+#ifdef ZSV_EXTRAS
+#include <inttypes.h>
+
+struct zsv_overwrite_data {
+  struct zsv_cell row;
+  struct zsv_cell col;
+  struct zsv_cell val;
+};
+
+struct zsv_overwrite {
+  void (*next)(struct zsv_overwrite *overwrite, struct zsv_overwrite_data *data);
+
+  size_t row_ix; // 0-based
+  size_t col_ix; // 0-based
+  struct zsv_cell val;
+  char have; // 1 = we have unprocessed overwrites
+
+  void *ctx;
+  int (*close_ctx)(void *);
+
+  void *reader;
+  int (*close_reader)(void *);
+};
+#endif
+
 struct zsv_scanner {
   char last;
   struct {
@@ -143,6 +168,8 @@ struct zsv_scanner {
   } fixed;
 
   struct collate_header *collate_header;
+  size_t data_row_count; /* 0 = in header row; 1 = first data row */
+  struct zsv_cell (*get_cell)(zsv_parser parser, size_t ix);
 
 #ifdef ZSV_EXTRAS
   struct {
@@ -162,6 +189,10 @@ struct zsv_scanner {
     size_t row_used;
     unsigned char now;
   } pull;
+
+#ifdef ZSV_EXTRAS
+  struct zsv_overwrite overwrite;
+#endif
 };
 
 void collate_header_destroy(struct collate_header **chp) {
@@ -189,7 +220,7 @@ static int collate_header_append(struct zsv_scanner *scanner, struct collate_hea
   size_t this_row_size = 0;
   size_t column_count = zsv_cell_count(scanner);
   for(size_t i = 0, j = column_count; i < j; i++) {
-    struct zsv_cell c = zsv_get_cell(scanner, i);
+    struct zsv_cell c = zsv_get_cell_1(scanner, i);
     if(c.len)
       this_row_size += c.len + 1; // +1: terminating null or delim
   }
@@ -217,7 +248,7 @@ static int collate_header_append(struct zsv_scanner *scanner, struct collate_hea
   ch->buff.used += this_row_size;
   ch->buff.buff = new_row;
   for(size_t i = column_count; i > 0; i--) {
-    struct zsv_cell c = zsv_get_cell(scanner, i-1);
+    struct zsv_cell c = zsv_get_cell_1(scanner, i-1);
     // copy new row's cell value to end
     if(c.len) {
       memcpy(new_row + new_row_end - c.len - 1, c.str, c.len);
@@ -308,7 +339,7 @@ __attribute__((always_inline)) static inline void cell_dl(struct zsv_scanner * s
     scanner->opts.cell_handler(scanner->opts.ctx, s, n);
   if(VERY_LIKELY(scanner->row.used < scanner->row.allocated)) {
     struct zsv_row *row = &scanner->row;
-    struct zsv_cell c = { s, n, scanner->opts.no_quotes ? 1 : scanner->quoted };
+    struct zsv_cell c = { s, n, scanner->opts.no_quotes ? 1 : scanner->quoted, 0 };
     row->cells[row->used++] = c;
   } else
     scanner->row.overflow++;
@@ -323,8 +354,11 @@ __attribute__((always_inline)) static inline enum zsv_status row_dl(struct zsv_s
             scanner->row.allocated + scanner->row.overflow, scanner->row.allocated);
     scanner->row.overflow = 0;
   }
-  if(VERY_LIKELY(scanner->opts.row_handler != NULL))
+  if(VERY_LIKELY(scanner->opts.row_handler != NULL)) // TO DO: disallow row_handler to be null; if null, set to dummy
     scanner->opts.row_handler(scanner->opts.ctx);
+  // Note: scanner->data_row_count will be incremented AFTER this call
+  //       in order to accommodate pull parsing, in which case incrementing here
+  //       would be too early
 # ifdef ZSV_EXTRAS
   scanner->progress.cum_row_count++;
   if(VERY_UNLIKELY(scanner->opts.progress.rows_interval
@@ -481,7 +515,7 @@ static void apply_callbacks(struct zsv_scanner *scanner) {
     // call the user-provided cell() callback on each cell
     unsigned char saved_quoted = scanner->quoted;
     for(size_t i = 0, j = zsv_cell_count(scanner); i < j; i++) {
-      struct zsv_cell c = zsv_get_cell(scanner, i);
+      struct zsv_cell c = zsv_get_cell_1(scanner, i);
       scanner->quoted = c.quoted;
       scanner->opts.cell_handler(scanner->opts.ctx, c.str, c.len);
     }
@@ -530,6 +564,14 @@ static void collate_header_row(void *ctx) {
     --scanner->opts.header_span;
 
     // save this row
+
+    // first, make sure this row has at least as many cells as the largest prior row
+    if(scanner->collate_header) {
+      for(size_t i = zsv_cell_count(scanner); i < scanner->row.allocated && i < scanner->collate_header->column_count; i++)
+        memset(&scanner->row.cells[i], 0, sizeof(scanner->row.cells[i]));
+      scanner->row.used = scanner->collate_header->column_count;
+    }
+
     if(collate_header_append(scanner, &scanner->collate_header))
       scanner->abort = 1;
   }
@@ -571,6 +613,11 @@ static void set_callbacks(struct zsv_scanner *scanner) {
     scanner->opts.cell_handler = NULL;
     scanner->opts.ctx = scanner;
   } else {
+    if(scanner->overwrite.have)
+      scanner->get_cell = zsv_get_cell_with_overwrite;
+    else
+      scanner->get_cell = zsv_get_cell_1;
+    scanner->data_row_count = 0;
     scanner->opts.row_handler = scanner->opts_orig.row_handler;
     scanner->opts.cell_handler = scanner->opts_orig.cell_handler;
     scanner->opts.ctx = scanner->opts_orig.ctx;
@@ -580,12 +627,115 @@ static void set_callbacks(struct zsv_scanner *scanner) {
 static void zsv_throwaway_row(void *ctx) {
   struct zsv_scanner *scanner = ctx;
   if(scanner->opts.overflow_row_handler != NULL) {
-    if(zsv_cell_count(scanner) > 1 || zsv_get_cell(scanner, 0).len > 0)
+    if(zsv_cell_count(scanner) > 1 || zsv_get_cell_1(scanner, 0).len > 0)
       scanner->opts.overflow_row_handler(ctx);
   }
   set_callbacks(ctx);
 }
 
+#ifdef ZSV_EXTRAS
+static int zsv_delete_v(void *p) {
+  return zsv_delete((zsv_parser)p);
+}
+
+static void zsv_next_overwrite_csv(struct zsv_overwrite *overwrite,
+                                   struct zsv_overwrite_data *data) {
+  // row, column, value
+  data->row = zsv_get_cell_1(overwrite->reader, 0);
+  data->col = zsv_get_cell_1(overwrite->reader, 1);
+  data->val = zsv_get_cell_1(overwrite->reader, 2);
+}
+
+// TO DO: consolidate with zsv_echo_get_next_overwrite()
+static void zsv_next_overwrite(struct zsv_overwrite *overwrite) {
+  if(overwrite->have) {
+    if(zsv_next_row(overwrite->reader) != zsv_status_row)
+      overwrite->have = 0;
+    else {
+      struct zsv_overwrite_data data;
+      overwrite->next(overwrite, &data);
+      if(data.row.len && data.col.len) {
+        char *end = (char *)(data.row.str + data.row.len);
+        char **endp = &end;
+        overwrite->row_ix = strtoumax((char *)data.row.str, endp, 10);
+        end = (char *)(data.col.str + data.col.len);
+        overwrite->col_ix = strtoumax((char *)data.col.str, endp, 10);
+        overwrite->val = data.val;
+      } else {
+        overwrite->row_ix = 0;
+        overwrite->col_ix = 0;
+        overwrite->val.len = 0;
+      }
+    }
+  }
+}
+
+static enum zsv_status zsv_init_overwrites(zsv_parser parser, struct zsv_opt_overwrite *overwrite_opts) {
+  if(overwrite_opts->type <= zsv_overwrite_type_none)
+    return zsv_status_ok;
+  struct zsv_overwrite *overwrite = &parser->overwrite;
+  switch(overwrite_opts->type) {
+  case zsv_overwrite_type_csv:
+    {
+      struct zsv_opts opts = { 0 };
+      overwrite->ctx = opts.stream = overwrite_opts->ctx;
+      overwrite->close_ctx = overwrite_opts->close_ctx;
+      if(!(overwrite->reader = zsv_new(&opts)))
+        return zsv_status_memory;
+      overwrite->close_reader = zsv_delete_v;
+      overwrite->next = zsv_next_overwrite_csv;
+    }
+    break;
+  default:
+    fprintf(stderr, "Unrecognized overwrite type\n");
+    return zsv_status_error;
+  }
+
+  overwrite->have = 0;
+  if(zsv_next_row(overwrite->reader) == zsv_status_row) {
+    // to do: check that column names are row, col, value
+    struct zsv_overwrite_data data;
+    overwrite->next(overwrite, &data);
+    if(data.row.len < 3 || memcmp(data.row.str, "row", 3)
+       || data.col.len < 3 || memcmp(data.col.str, "col", 3)
+       || data.val.len < 3 || memcmp(data.val.str, "val", 3))
+      fprintf(stderr, "Warning! overwrite expects 'row,col,value' header, got '%.*s,%.*s,%.*s'\n",
+              (int)data.row.len, data.row.str,
+              (int)data.col.len, data.col.str,
+              (int)data.val.len, data.val.str
+              );
+    overwrite->have = 1;
+    zsv_next_overwrite(overwrite);
+  }
+  return overwrite->have ? zsv_status_ok : zsv_status_error;
+}
+
+static int zsv_have_overwrite(zsv_parser parser, size_t row_ix, size_t col_ix) {
+  struct zsv_overwrite *overwrite = &parser->overwrite;
+  while(overwrite->have && overwrite->row_ix < row_ix)
+    zsv_next_overwrite(overwrite);
+  while(overwrite->have && overwrite->row_ix == row_ix && overwrite->col_ix < col_ix)
+    zsv_next_overwrite(overwrite);
+  if(!overwrite->have)
+    parser->get_cell = zsv_get_cell_1;
+  return overwrite->have && overwrite->row_ix == row_ix && overwrite->col_ix == col_ix;
+}
+
+static struct zsv_cell zsv_get_cell_with_overwrite(zsv_parser parser, size_t col_ix) {
+  if(VERY_LIKELY(col_ix < parser->row.used)) {
+    size_t row_ix = parser->data_row_count;
+    if(!zsv_have_overwrite(parser, row_ix, col_ix))
+      return parser->row.cells[col_ix];
+
+    struct zsv_cell c = parser->overwrite.val;
+    c.overwritten = 1;
+    return c;
+  }
+  struct zsv_cell c = { 0, 0, 0, 0 };
+  return c;
+}
+#endif
+
 static int zsv_scanner_init(struct zsv_scanner *scanner,
                               struct zsv_opts *opts) {
   size_t need_buff_size = 0;
@@ -611,7 +761,6 @@ static int zsv_scanner_init(struct zsv_scanner *scanner,
     opts->max_row_size = opts->buffsize / 2;
     fprintf(stderr, "Warning: max row size set to %u due to buffer size %zu\n", opts->max_row_size, opts->buffsize);
   }
-
   scanner->in = opts->stream;
   if(!opts->read) {
     scanner->read = (zsv_generic_read)fread;
@@ -641,8 +790,11 @@ static int zsv_scanner_init(struct zsv_scanner *scanner,
     set_callbacks(scanner);
     if((scanner->row.allocated = scanner->opts.max_columns)
        && (scanner->row.cells = calloc(scanner->row.allocated, sizeof(*scanner->row.cells))))
-      return 0;
+# ifdef ZSV_EXTRAS
+      // initialize overwrites
+      if(zsv_init_overwrites(scanner, &scanner->opts.overwrite) == zsv_status_ok)
+# endif
+        return 0;
   }
-
   return 1;
 }

+ 4 - 2
csv.mod/zsv/src/zsv_scan_delim.c

@@ -158,7 +158,7 @@ static enum zsv_status ZSV_SCAN_DELIM(struct zsv_scanner *scanner,
         if(VERY_UNLIKELY(stat))
           return stat;
 #ifdef ZSV_SUPPORT_PULL_PARSER
-        if(VERY_LIKELY(scanner->pull.now)) {
+        if(scanner->pull.now) {
           scanner->pull.now = 0;
           scanner->row.used = scanner->pull.row_used;
           zsv_internal_save_regs(1);
@@ -170,6 +170,7 @@ static enum zsv_status ZSV_SCAN_DELIM(struct zsv_scanner *scanner,
 #endif
         scanner->cell_start = i + 1;
         scanner->row_start = i + 1;
+        scanner->data_row_count++;
         continue; // this char is not part of the cell content
       } else
         // we are inside an open quote, which is needed to escape this char
@@ -187,7 +188,7 @@ static enum zsv_status ZSV_SCAN_DELIM(struct zsv_scanner *scanner,
           if(VERY_UNLIKELY(stat))
             return stat;
 #ifdef ZSV_SUPPORT_PULL_PARSER
-          if(VERY_LIKELY(scanner->pull.now)) {
+          if(scanner->pull.now) {
             scanner->pull.now = 0;
             scanner->row.used = scanner->pull.row_used;
             zsv_internal_save_regs(2);
@@ -199,6 +200,7 @@ static enum zsv_status ZSV_SCAN_DELIM(struct zsv_scanner *scanner,
 #endif
           scanner->cell_start = i + 1;
           scanner->row_start = i + 1;
+          scanner->data_row_count++;
         }
         continue; // this char is not part of the cell content
       } else

+ 1 - 1
csv.mod/zsv/src/zsv_scan_fixed.c

@@ -10,7 +10,7 @@ static inline char row_fx(struct zsv_scanner *scanner,
     unsigned char *s = buff + cell_start;
     if(UNLIKELY(scanner->opts.cell_handler != NULL))
       scanner->opts.cell_handler(scanner->opts.ctx, s, cell_length);
-    struct zsv_cell c = { s, cell_length, 1 };
+    struct zsv_cell c = { s, cell_length, 1, 0 };
     scanner->row.cells[scanner->row.used++] = c;
 
     cell_start = cell_end;

+ 49 - 0
csv.mod/zsv/src/zsv_strencode.c

@@ -0,0 +1,49 @@
+/*
+ * zsv_strencode(): standalone file to allow zsv utilities that use this
+ * to be used on a standalone basis without the zsv parser
+ *
+ * This file is part of zsv/lib, distributed under the license defined at
+ * https://opensource.org/licenses/MIT
+ */
+
+#include <zsv/utils/utf8.h>
+#include <zsv/utils/compiler.h>
+/**
+ * Ensure valid UTF8 encoding by, if needed, replacing malformed bytes
+ */
+ZSV_EXPORT
+size_t zsv_strencode(unsigned char *s, size_t n, unsigned char replace,
+                     int (*malformed_handler)(void *, const unsigned char *s, size_t n, size_t offset), void *handler_ctx) {
+  size_t new_len = 0;
+  int clen;
+  for(size_t i2 = 0; i2 < n; i2 += (size_t)clen) {
+    clen = ZSV_UTF8_CHARLEN(s[i2]);
+    if(LIKELY(clen == 1))
+      s[new_len++] = s[i2];
+    else if(UNLIKELY(clen < 0) || UNLIKELY(i2 + clen >= n)) {
+      if(malformed_handler)
+        malformed_handler(handler_ctx, s, n, new_len);
+      if(replace)
+        s[new_len++] = replace;
+      clen = 1;
+    } else { /* might be valid multi-byte utf8; check */
+      unsigned char valid_n;
+      for(valid_n = 1; valid_n < clen; valid_n++)
+        if(!ZSV_UTF8_SUBSEQUENT_CHAR_OK(s[i2 + valid_n]))
+          break;
+      if(valid_n == clen) { /* valid_n utf8; copy it */
+        memmove(s + new_len, s + i2, clen);
+        new_len += clen;
+      } else { /* invalid; valid_n smaller than expected */
+        if(malformed_handler)
+          malformed_handler(handler_ctx, s, n, new_len);
+        if(replace) {
+          memset(s + new_len, replace, valid_n);
+          new_len += valid_n;
+        }
+        clen = valid_n;
+      }
+    }
+  }
+  return new_len; // new length
+}