Browse Source

added mikktspace tangent computation support to fmt.hdll

ncannasse 7 years ago
parent
commit
cbeb910c81
6 changed files with 2140 additions and 8 deletions
  1. 2 2
      Makefile
  2. 1890 0
      include/mikktspace/mikktspace.c
  3. 145 0
      include/mikktspace/mikktspace.h
  4. 9 6
      libs/fmt/fmt.vcxproj
  5. 10 0
      libs/fmt/fmt.vcxproj.filters
  6. 84 0
      libs/fmt/mikkt.c

+ 2 - 2
Makefile

@@ -26,7 +26,7 @@ STD = src/std/array.o src/std/buffer.o src/std/bytes.o src/std/cast.o src/std/da
 
 HL = src/code.o src/jit.o src/main.o src/module.o src/debugger.o
 
-FMT = libs/fmt/fmt.o libs/fmt/sha1.o
+FMT = libs/fmt/fmt.o libs/fmt/sha1.o include/mikktspace/mikktspace.c libs/fmt/mikkt.c
 
 SDL = libs/sdl/sdl.o libs/sdl/gl.o
 
@@ -121,7 +121,7 @@ hl: ${HL} libhl
 	${CC} ${CFLAGS} -o hl ${HL} ${LFLAGS} ${HLFLAGS}
 
 fmt: ${FMT} libhl
-	${CC} ${CFLAGS} -shared -o fmt.hdll ${FMT} ${LIBFLAGS} -L. -lhl -lpng $(LIBTURBOJPEG) -lz -lvorbisfile
+	${CC} ${CFLAGS} -shared -o fmt.hdll ${FMT} ${LIBFLAGS} -L. -I include/mikktspace -lhl -lpng $(LIBTURBOJPEG) -lz -lvorbisfile
 
 sdl: ${SDL} libhl
 	${CC} ${CFLAGS} -shared -o sdl.hdll ${SDL} ${LIBFLAGS} -L. -lhl -lSDL2 $(LIBOPENGL)

+ 1890 - 0
include/mikktspace/mikktspace.c

@@ -0,0 +1,1890 @@
+/** \file mikktspace/mikktspace.c
+ *  \ingroup mikktspace
+ */
+/**
+ *  Copyright (C) 2011 by Morten S. Mikkelsen
+ *
+ *  This software is provided 'as-is', without any express or implied
+ *  warranty.  In no event will the authors be held liable for any damages
+ *  arising from the use of this software.
+ *
+ *  Permission is granted to anyone to use this software for any purpose,
+ *  including commercial applications, and to alter it and redistribute it
+ *  freely, subject to the following restrictions:
+ *
+ *  1. The origin of this software must not be misrepresented; you must not
+ *     claim that you wrote the original software. If you use this software
+ *     in a product, an acknowledgment in the product documentation would be
+ *     appreciated but is not required.
+ *  2. Altered source versions must be plainly marked as such, and must not be
+ *     misrepresented as being the original software.
+ *  3. This notice may not be removed or altered from any source distribution.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+#include <float.h>
+#include <stdlib.h>
+
+#include "mikktspace.h"
+
+#define TFALSE		0
+#define TTRUE		1
+
+#ifndef M_PI
+#define M_PI	3.1415926535897932384626433832795
+#endif
+
+#define INTERNAL_RND_SORT_SEED		39871946
+
+// internal structure
+typedef struct {
+	float x, y, z;
+} SVec3;
+
+static tbool			veq( const SVec3 v1, const SVec3 v2 )
+{
+	return (v1.x == v2.x) && (v1.y == v2.y) && (v1.z == v2.z);
+}
+
+static SVec3		vadd( const SVec3 v1, const SVec3 v2 )
+{
+	SVec3 vRes;
+
+	vRes.x = v1.x + v2.x;
+	vRes.y = v1.y + v2.y;
+	vRes.z = v1.z + v2.z;
+
+	return vRes;
+}
+
+
+static SVec3		vsub( const SVec3 v1, const SVec3 v2 )
+{
+	SVec3 vRes;
+
+	vRes.x = v1.x - v2.x;
+	vRes.y = v1.y - v2.y;
+	vRes.z = v1.z - v2.z;
+
+	return vRes;
+}
+
+static SVec3		vscale(const float fS, const SVec3 v)
+{
+	SVec3 vRes;
+
+	vRes.x = fS * v.x;
+	vRes.y = fS * v.y;
+	vRes.z = fS * v.z;
+
+	return vRes;
+}
+
+static float			LengthSquared( const SVec3 v )
+{
+	return v.x*v.x + v.y*v.y + v.z*v.z;
+}
+
+static float			Length( const SVec3 v )
+{
+	return sqrtf(LengthSquared(v));
+}
+
+static SVec3		Normalize( const SVec3 v )
+{
+	return vscale(1 / Length(v), v);
+}
+
+static float		vdot( const SVec3 v1, const SVec3 v2)
+{
+	return v1.x*v2.x + v1.y*v2.y + v1.z*v2.z;
+}
+
+
+static tbool NotZero(const float fX)
+{
+	// could possibly use FLT_EPSILON instead
+	return fabsf(fX) > FLT_MIN;
+}
+
+static tbool VNotZero(const SVec3 v)
+{
+	// might change this to an epsilon based test
+	return NotZero(v.x) || NotZero(v.y) || NotZero(v.z);
+}
+
+
+
+typedef struct {
+	int iNrFaces;
+	int * pTriMembers;
+} SSubGroup;
+
+typedef struct {
+	int iNrFaces;
+	int * pFaceIndices;
+	int iVertexRepresentitive;
+	tbool bOrientPreservering;
+} SGroup;
+
+// 
+#define MARK_DEGENERATE				1
+#define QUAD_ONE_DEGEN_TRI			2
+#define GROUP_WITH_ANY				4
+#define ORIENT_PRESERVING			8
+
+
+
+typedef struct {
+	int FaceNeighbors[3];
+	SGroup * AssignedGroup[3];
+	
+	// normalized first order face derivatives
+	SVec3 vOs, vOt;
+	float fMagS, fMagT;	// original magnitudes
+
+	// determines if the current and the next triangle are a quad.
+	int iOrgFaceNumber;
+	int iFlag, iTSpacesOffs;
+	unsigned char vert_num[4];
+} STriInfo;
+
+typedef struct {
+	SVec3 vOs;
+	float fMagS;
+	SVec3 vOt;
+	float fMagT;
+	int iCounter;	// this is to average back into quads.
+	tbool bOrient;
+} STSpace;
+
+static int GenerateInitialVerticesIndexList(STriInfo pTriInfos[], int piTriList_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn);
+static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn);
+static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn);
+static int Build4RuleGroups(STriInfo pTriInfos[], SGroup pGroups[], int piGroupTrianglesBuffer[], const int piTriListIn[], const int iNrTrianglesIn);
+static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], const SGroup pGroups[],
+                             const int iNrActiveGroups, const int piTriListIn[], const float fThresCos,
+                             const SMikkTSpaceContext * pContext);
+
+static int MakeIndex(const int iFace, const int iVert)
+{
+	assert(iVert>=0 && iVert<4 && iFace>=0);
+	return (iFace<<2) | (iVert&0x3);
+}
+
+static void IndexToData(int * piFace, int * piVert, const int iIndexIn)
+{
+	piVert[0] = iIndexIn&0x3;
+	piFace[0] = iIndexIn>>2;
+}
+
+static STSpace AvgTSpace(const STSpace * pTS0, const STSpace * pTS1)
+{
+	STSpace ts_res;
+
+	// this if is important. Due to floating point precision
+	// averaging when ts0==ts1 will cause a slight difference
+	// which results in tangent space splits later on
+	if (pTS0->fMagS==pTS1->fMagS && pTS0->fMagT==pTS1->fMagT &&
+	   veq(pTS0->vOs,pTS1->vOs)	&& veq(pTS0->vOt, pTS1->vOt))
+	{
+		ts_res.fMagS = pTS0->fMagS;
+		ts_res.fMagT = pTS0->fMagT;
+		ts_res.vOs = pTS0->vOs;
+		ts_res.vOt = pTS0->vOt;
+	}
+	else
+	{
+		ts_res.fMagS = 0.5f*(pTS0->fMagS+pTS1->fMagS);
+		ts_res.fMagT = 0.5f*(pTS0->fMagT+pTS1->fMagT);
+		ts_res.vOs = vadd(pTS0->vOs,pTS1->vOs);
+		ts_res.vOt = vadd(pTS0->vOt,pTS1->vOt);
+		if ( VNotZero(ts_res.vOs) ) ts_res.vOs = Normalize(ts_res.vOs);
+		if ( VNotZero(ts_res.vOt) ) ts_res.vOt = Normalize(ts_res.vOt);
+	}
+
+	return ts_res;
+}
+
+
+
+static SVec3 GetPosition(const SMikkTSpaceContext * pContext, const int index);
+static SVec3 GetNormal(const SMikkTSpaceContext * pContext, const int index);
+static SVec3 GetTexCoord(const SMikkTSpaceContext * pContext, const int index);
+
+
+// degen triangles
+static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int iNrTrianglesIn, const int iTotTris);
+static void DegenEpilogue(STSpace psTspace[], STriInfo pTriInfos[], int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn, const int iTotTris);
+
+
+tbool genTangSpaceDefault(const SMikkTSpaceContext * pContext)
+{
+	return genTangSpace(pContext, 180.0f);
+}
+
+tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThreshold)
+{
+	// count nr_triangles
+	int * piTriListIn = NULL, * piGroupTrianglesBuffer = NULL;
+	STriInfo * pTriInfos = NULL;
+	SGroup * pGroups = NULL;
+	STSpace * psTspace = NULL;
+	int iNrTrianglesIn = 0, f=0, t=0, i=0;
+	int iNrTSPaces = 0, iTotTris = 0, iDegenTriangles = 0, iNrMaxGroups = 0;
+	int iNrActiveGroups = 0, index = 0;
+	const int iNrFaces = pContext->m_pInterface->m_getNumFaces(pContext);
+	tbool bRes = TFALSE;
+	const float fThresCos = (float) cos((fAngularThreshold*(float)M_PI)/180.0f);
+
+	// verify all call-backs have been set
+	if ( pContext->m_pInterface->m_getNumFaces==NULL ||
+		pContext->m_pInterface->m_getNumVerticesOfFace==NULL ||
+		pContext->m_pInterface->m_getPosition==NULL ||
+		pContext->m_pInterface->m_getNormal==NULL ||
+		pContext->m_pInterface->m_getTexCoord==NULL )
+		return TFALSE;
+
+	// count triangles on supported faces
+	for (f=0; f<iNrFaces; f++)
+	{
+		const int verts = pContext->m_pInterface->m_getNumVerticesOfFace(pContext, f);
+		if (verts==3) ++iNrTrianglesIn;
+		else if (verts==4) iNrTrianglesIn += 2;
+	}
+	if (iNrTrianglesIn<=0) return TFALSE;
+
+	// allocate memory for an index list
+	piTriListIn = (int *) malloc(sizeof(int)*3*iNrTrianglesIn);
+	pTriInfos = (STriInfo *) malloc(sizeof(STriInfo)*iNrTrianglesIn);
+	if (piTriListIn==NULL || pTriInfos==NULL)
+	{
+		if (piTriListIn!=NULL) free(piTriListIn);
+		if (pTriInfos!=NULL) free(pTriInfos);
+		return TFALSE;
+	}
+
+	// make an initial triangle --> face index list
+	iNrTSPaces = GenerateInitialVerticesIndexList(pTriInfos, piTriListIn, pContext, iNrTrianglesIn);
+
+	// make a welded index list of identical positions and attributes (pos, norm, texc)
+	//printf("gen welded index list begin\n");
+	GenerateSharedVerticesIndexList(piTriListIn, pContext, iNrTrianglesIn);
+	//printf("gen welded index list end\n");
+
+	// Mark all degenerate triangles
+	iTotTris = iNrTrianglesIn;
+	iDegenTriangles = 0;
+	for (t=0; t<iTotTris; t++)
+	{
+		const int i0 = piTriListIn[t*3+0];
+		const int i1 = piTriListIn[t*3+1];
+		const int i2 = piTriListIn[t*3+2];
+		const SVec3 p0 = GetPosition(pContext, i0);
+		const SVec3 p1 = GetPosition(pContext, i1);
+		const SVec3 p2 = GetPosition(pContext, i2);
+		if (veq(p0,p1) || veq(p0,p2) || veq(p1,p2))	// degenerate
+		{
+			pTriInfos[t].iFlag |= MARK_DEGENERATE;
+			++iDegenTriangles;
+		}
+	}
+	iNrTrianglesIn = iTotTris - iDegenTriangles;
+
+	// mark all triangle pairs that belong to a quad with only one
+	// good triangle. These need special treatment in DegenEpilogue().
+	// Additionally, move all good triangles to the start of
+	// pTriInfos[] and piTriListIn[] without changing order and
+	// put the degenerate triangles last.
+	DegenPrologue(pTriInfos, piTriListIn, iNrTrianglesIn, iTotTris);
+
+	
+	// evaluate triangle level attributes and neighbor list
+	//printf("gen neighbors list begin\n");
+	InitTriInfo(pTriInfos, piTriListIn, pContext, iNrTrianglesIn);
+	//printf("gen neighbors list end\n");
+
+	
+	// based on the 4 rules, identify groups based on connectivity
+	iNrMaxGroups = iNrTrianglesIn*3;
+	pGroups = (SGroup *) malloc(sizeof(SGroup)*iNrMaxGroups);
+	piGroupTrianglesBuffer = (int *) malloc(sizeof(int)*iNrTrianglesIn*3);
+	if (pGroups==NULL || piGroupTrianglesBuffer==NULL)
+	{
+		if (pGroups!=NULL) free(pGroups);
+		if (piGroupTrianglesBuffer!=NULL) free(piGroupTrianglesBuffer);
+		free(piTriListIn);
+		free(pTriInfos);
+		return TFALSE;
+	}
+	//printf("gen 4rule groups begin\n");
+	iNrActiveGroups =
+		Build4RuleGroups(pTriInfos, pGroups, piGroupTrianglesBuffer, piTriListIn, iNrTrianglesIn);
+	//printf("gen 4rule groups end\n");
+
+	//
+
+	psTspace = (STSpace *) malloc(sizeof(STSpace)*iNrTSPaces);
+	if (psTspace==NULL)
+	{
+		free(piTriListIn);
+		free(pTriInfos);
+		free(pGroups);
+		free(piGroupTrianglesBuffer);
+		return TFALSE;
+	}
+	memset(psTspace, 0, sizeof(STSpace)*iNrTSPaces);
+	for (t=0; t<iNrTSPaces; t++)
+	{
+		psTspace[t].vOs.x=1.0f; psTspace[t].vOs.y=0.0f; psTspace[t].vOs.z=0.0f; psTspace[t].fMagS = 1.0f;
+		psTspace[t].vOt.x=0.0f; psTspace[t].vOt.y=1.0f; psTspace[t].vOt.z=0.0f; psTspace[t].fMagT = 1.0f;
+	}
+
+	// make tspaces, each group is split up into subgroups if necessary
+	// based on fAngularThreshold. Finally a tangent space is made for
+	// every resulting subgroup
+	//printf("gen tspaces begin\n");
+	bRes = GenerateTSpaces(psTspace, pTriInfos, pGroups, iNrActiveGroups, piTriListIn, fThresCos, pContext);
+	//printf("gen tspaces end\n");
+	
+	// clean up
+	free(pGroups);
+	free(piGroupTrianglesBuffer);
+
+	if (!bRes)	// if an allocation in GenerateTSpaces() failed
+	{
+		// clean up and return false
+		free(pTriInfos); free(piTriListIn); free(psTspace);
+		return TFALSE;
+	}
+
+
+	// degenerate quads with one good triangle will be fixed by copying a space from
+	// the good triangle to the coinciding vertex.
+	// all other degenerate triangles will just copy a space from any good triangle
+	// with the same welded index in piTriListIn[].
+	DegenEpilogue(psTspace, pTriInfos, piTriListIn, pContext, iNrTrianglesIn, iTotTris);
+
+	free(pTriInfos); free(piTriListIn);
+
+	index = 0;
+	for (f=0; f<iNrFaces; f++)
+	{
+		const int verts = pContext->m_pInterface->m_getNumVerticesOfFace(pContext, f);
+		if (verts!=3 && verts!=4) continue;
+		
+
+		// I've decided to let degenerate triangles and group-with-anythings
+		// vary between left/right hand coordinate systems at the vertices.
+		// All healthy triangles on the other hand are built to always be either or.
+
+		/*// force the coordinate system orientation to be uniform for every face.
+		// (this is already the case for good triangles but not for
+		// degenerate ones and those with bGroupWithAnything==true)
+		bool bOrient = psTspace[index].bOrient;
+		if (psTspace[index].iCounter == 0)	// tspace was not derived from a group
+		{
+			// look for a space created in GenerateTSpaces() by iCounter>0
+			bool bNotFound = true;
+			int i=1;
+			while (i<verts && bNotFound)
+			{
+				if (psTspace[index+i].iCounter > 0) bNotFound=false;
+				else ++i;
+			}
+			if (!bNotFound) bOrient = psTspace[index+i].bOrient;
+		}*/
+
+		// set data
+		for (i=0; i<verts; i++)
+		{
+			const STSpace * pTSpace = &psTspace[index];
+			float tang[] = {pTSpace->vOs.x, pTSpace->vOs.y, pTSpace->vOs.z};
+			float bitang[] = {pTSpace->vOt.x, pTSpace->vOt.y, pTSpace->vOt.z};
+			if (pContext->m_pInterface->m_setTSpace!=NULL)
+				pContext->m_pInterface->m_setTSpace(pContext, tang, bitang, pTSpace->fMagS, pTSpace->fMagT, pTSpace->bOrient, f, i);
+			if (pContext->m_pInterface->m_setTSpaceBasic!=NULL)
+				pContext->m_pInterface->m_setTSpaceBasic(pContext, tang, pTSpace->bOrient==TTRUE ? 1.0f : (-1.0f), f, i);
+
+			++index;
+		}
+	}
+
+	free(psTspace);
+
+	
+	return TTRUE;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+typedef struct {
+	float vert[3];
+	int index;
+} STmpVert;
+
+static const int g_iCells = 2048;
+
+#ifdef _MSC_VER
+	#define NOINLINE __declspec(noinline)
+#else
+	#define NOINLINE __attribute__ ((noinline))
+#endif
+
+// it is IMPORTANT that this function is called to evaluate the hash since
+// inlining could potentially reorder instructions and generate different
+// results for the same effective input value fVal.
+static NOINLINE int FindGridCell(const float fMin, const float fMax, const float fVal)
+{
+	const float fIndex = g_iCells * ((fVal-fMin)/(fMax-fMin));
+	const int iIndex = (int)fIndex;
+	return iIndex < g_iCells ? (iIndex >= 0 ? iIndex : 0) : (g_iCells - 1);
+}
+
+static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], const SMikkTSpaceContext * pContext, const int iL_in, const int iR_in);
+static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int pTable[], const int iEntries);
+static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn);
+
+static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn)
+{
+
+	// Generate bounding box
+	int * piHashTable=NULL, * piHashCount=NULL, * piHashOffsets=NULL, * piHashCount2=NULL;
+	STmpVert * pTmpVert = NULL;
+	int i=0, iChannel=0, k=0, e=0;
+	int iMaxCount=0;
+	SVec3 vMin = GetPosition(pContext, 0), vMax = vMin, vDim;
+	float fMin, fMax;
+	for (i=1; i<(iNrTrianglesIn*3); i++)
+	{
+		const int index = piTriList_in_and_out[i];
+
+		const SVec3 vP = GetPosition(pContext, index);
+		if (vMin.x > vP.x) vMin.x = vP.x;
+		else if (vMax.x < vP.x) vMax.x = vP.x;
+		if (vMin.y > vP.y) vMin.y = vP.y;
+		else if (vMax.y < vP.y) vMax.y = vP.y;
+		if (vMin.z > vP.z) vMin.z = vP.z;
+		else if (vMax.z < vP.z) vMax.z = vP.z;
+	}
+
+	vDim = vsub(vMax,vMin);
+	iChannel = 0;
+	fMin = vMin.x; fMax=vMax.x;
+	if (vDim.y>vDim.x && vDim.y>vDim.z)
+	{
+		iChannel=1;
+		fMin = vMin.y, fMax=vMax.y;
+	}
+	else if (vDim.z>vDim.x)
+	{
+		iChannel=2;
+		fMin = vMin.z, fMax=vMax.z;
+	}
+
+	// make allocations
+	piHashTable = (int *) malloc(sizeof(int)*iNrTrianglesIn*3);
+	piHashCount = (int *) malloc(sizeof(int)*g_iCells);
+	piHashOffsets = (int *) malloc(sizeof(int)*g_iCells);
+	piHashCount2 = (int *) malloc(sizeof(int)*g_iCells);
+
+	if (piHashTable==NULL || piHashCount==NULL || piHashOffsets==NULL || piHashCount2==NULL)
+	{
+		if (piHashTable!=NULL) free(piHashTable);
+		if (piHashCount!=NULL) free(piHashCount);
+		if (piHashOffsets!=NULL) free(piHashOffsets);
+		if (piHashCount2!=NULL) free(piHashCount2);
+		GenerateSharedVerticesIndexListSlow(piTriList_in_and_out, pContext, iNrTrianglesIn);
+		return;
+	}
+	memset(piHashCount, 0, sizeof(int)*g_iCells);
+	memset(piHashCount2, 0, sizeof(int)*g_iCells);
+
+	// count amount of elements in each cell unit
+	for (i=0; i<(iNrTrianglesIn*3); i++)
+	{
+		const int index = piTriList_in_and_out[i];
+		const SVec3 vP = GetPosition(pContext, index);
+		const float fVal = iChannel==0 ? vP.x : (iChannel==1 ? vP.y : vP.z);
+		const int iCell = FindGridCell(fMin, fMax, fVal);
+		++piHashCount[iCell];
+	}
+
+	// evaluate start index of each cell.
+	piHashOffsets[0]=0;
+	for (k=1; k<g_iCells; k++)
+		piHashOffsets[k]=piHashOffsets[k-1]+piHashCount[k-1];
+
+	// insert vertices
+	for (i=0; i<(iNrTrianglesIn*3); i++)
+	{
+		const int index = piTriList_in_and_out[i];
+		const SVec3 vP = GetPosition(pContext, index);
+		const float fVal = iChannel==0 ? vP.x : (iChannel==1 ? vP.y : vP.z);
+		const int iCell = FindGridCell(fMin, fMax, fVal);
+		int * pTable = NULL;
+
+		assert(piHashCount2[iCell]<piHashCount[iCell]);
+		pTable = &piHashTable[piHashOffsets[iCell]];
+		pTable[piHashCount2[iCell]] = i;	// vertex i has been inserted.
+		++piHashCount2[iCell];
+	}
+	for (k=0; k<g_iCells; k++)
+		assert(piHashCount2[k] == piHashCount[k]);	// verify the count
+	free(piHashCount2);
+
+	// find maximum amount of entries in any hash entry
+	iMaxCount = piHashCount[0];
+	for (k=1; k<g_iCells; k++)
+		if (iMaxCount<piHashCount[k])
+			iMaxCount=piHashCount[k];
+	pTmpVert = (STmpVert *) malloc(sizeof(STmpVert)*iMaxCount);
+	
+
+	// complete the merge
+	for (k=0; k<g_iCells; k++)
+	{
+		// extract table of cell k and amount of entries in it
+		int * pTable = &piHashTable[piHashOffsets[k]];
+		const int iEntries = piHashCount[k];
+		if (iEntries < 2) continue;
+
+		if (pTmpVert!=NULL)
+		{
+			for (e=0; e<iEntries; e++)
+			{
+				int i = pTable[e];
+				const SVec3 vP = GetPosition(pContext, piTriList_in_and_out[i]);
+				pTmpVert[e].vert[0] = vP.x; pTmpVert[e].vert[1] = vP.y;
+				pTmpVert[e].vert[2] = vP.z; pTmpVert[e].index = i;
+			}
+			MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, 0, iEntries-1);
+		}
+		else
+			MergeVertsSlow(piTriList_in_and_out, pContext, pTable, iEntries);
+	}
+
+	if (pTmpVert!=NULL) { free(pTmpVert); }
+	free(piHashTable);
+	free(piHashCount);
+	free(piHashOffsets);
+}
+
+static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], const SMikkTSpaceContext * pContext, const int iL_in, const int iR_in)
+{
+	// make bbox
+	int c=0, l=0, channel=0;
+	float fvMin[3], fvMax[3];
+	float dx=0, dy=0, dz=0, fSep=0;
+	for (c=0; c<3; c++)
+	{	fvMin[c]=pTmpVert[iL_in].vert[c]; fvMax[c]=fvMin[c];	}
+	for (l=(iL_in+1); l<=iR_in; l++)
+		for (c=0; c<3; c++)
+			if (fvMin[c]>pTmpVert[l].vert[c]) fvMin[c]=pTmpVert[l].vert[c];
+			else if (fvMax[c]<pTmpVert[l].vert[c]) fvMax[c]=pTmpVert[l].vert[c];
+
+	dx = fvMax[0]-fvMin[0];
+	dy = fvMax[1]-fvMin[1];
+	dz = fvMax[2]-fvMin[2];
+
+	channel = 0;
+	if (dy>dx && dy>dz) channel=1;
+	else if (dz>dx) channel=2;
+
+	fSep = 0.5f*(fvMax[channel]+fvMin[channel]);
+
+	// terminate recursion when the separation/average value
+	// is no longer strictly between fMin and fMax values.
+	if (fSep>=fvMax[channel] || fSep<=fvMin[channel])
+	{
+		// complete the weld
+		for (l=iL_in; l<=iR_in; l++)
+		{
+			int i = pTmpVert[l].index;
+			const int index = piTriList_in_and_out[i];
+			const SVec3 vP = GetPosition(pContext, index);
+			const SVec3 vN = GetNormal(pContext, index);
+			const SVec3 vT = GetTexCoord(pContext, index);
+
+			tbool bNotFound = TTRUE;
+			int l2=iL_in, i2rec=-1;
+			while (l2<l && bNotFound)
+			{
+				const int i2 = pTmpVert[l2].index;
+				const int index2 = piTriList_in_and_out[i2];
+				const SVec3 vP2 = GetPosition(pContext, index2);
+				const SVec3 vN2 = GetNormal(pContext, index2);
+				const SVec3 vT2 = GetTexCoord(pContext, index2);
+				i2rec=i2;
+
+				//if (vP==vP2 && vN==vN2 && vT==vT2)
+				if (vP.x==vP2.x && vP.y==vP2.y && vP.z==vP2.z &&
+					vN.x==vN2.x && vN.y==vN2.y && vN.z==vN2.z &&
+					vT.x==vT2.x && vT.y==vT2.y && vT.z==vT2.z)
+					bNotFound = TFALSE;
+				else
+					++l2;
+			}
+			
+			// merge if previously found
+			if (!bNotFound)
+				piTriList_in_and_out[i] = piTriList_in_and_out[i2rec];
+		}
+	}
+	else
+	{
+		int iL=iL_in, iR=iR_in;
+		assert((iR_in-iL_in)>0);	// at least 2 entries
+
+		// separate (by fSep) all points between iL_in and iR_in in pTmpVert[]
+		while (iL < iR)
+		{
+			tbool bReadyLeftSwap = TFALSE, bReadyRightSwap = TFALSE;
+			while ((!bReadyLeftSwap) && iL<iR)
+			{
+				assert(iL>=iL_in && iL<=iR_in);
+				bReadyLeftSwap = !(pTmpVert[iL].vert[channel]<fSep);
+				if (!bReadyLeftSwap) ++iL;
+			}
+			while ((!bReadyRightSwap) && iL<iR)
+			{
+				assert(iR>=iL_in && iR<=iR_in);
+				bReadyRightSwap = pTmpVert[iR].vert[channel]<fSep;
+				if (!bReadyRightSwap) --iR;
+			}
+			assert( (iL<iR) || !(bReadyLeftSwap && bReadyRightSwap) );
+
+			if (bReadyLeftSwap && bReadyRightSwap)
+			{
+				const STmpVert sTmp = pTmpVert[iL];
+				assert(iL<iR);
+				pTmpVert[iL] = pTmpVert[iR];
+				pTmpVert[iR] = sTmp;
+				++iL; --iR;
+			}
+		}
+
+		assert(iL==(iR+1) || (iL==iR));
+		if (iL==iR)
+		{
+			const tbool bReadyRightSwap = pTmpVert[iR].vert[channel]<fSep;
+			if (bReadyRightSwap) ++iL;
+			else --iR;
+		}
+
+		// only need to weld when there is more than 1 instance of the (x,y,z)
+		if (iL_in < iR)
+			MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, iL_in, iR);	// weld all left of fSep
+		if (iL < iR_in)
+			MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, iL, iR_in);	// weld all right of (or equal to) fSep
+	}
+}
+
+static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int pTable[], const int iEntries)
+{
+	// this can be optimized further using a tree structure or more hashing.
+	int e=0;
+	for (e=0; e<iEntries; e++)
+	{
+		int i = pTable[e];
+		const int index = piTriList_in_and_out[i];
+		const SVec3 vP = GetPosition(pContext, index);
+		const SVec3 vN = GetNormal(pContext, index);
+		const SVec3 vT = GetTexCoord(pContext, index);
+
+		tbool bNotFound = TTRUE;
+		int e2=0, i2rec=-1;
+		while (e2<e && bNotFound)
+		{
+			const int i2 = pTable[e2];
+			const int index2 = piTriList_in_and_out[i2];
+			const SVec3 vP2 = GetPosition(pContext, index2);
+			const SVec3 vN2 = GetNormal(pContext, index2);
+			const SVec3 vT2 = GetTexCoord(pContext, index2);
+			i2rec = i2;
+
+			if (veq(vP,vP2) && veq(vN,vN2) && veq(vT,vT2))
+				bNotFound = TFALSE;
+			else
+				++e2;
+		}
+		
+		// merge if previously found
+		if (!bNotFound)
+			piTriList_in_and_out[i] = piTriList_in_and_out[i2rec];
+	}
+}
+
+static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn)
+{
+	int iNumUniqueVerts = 0, t=0, i=0;
+	for (t=0; t<iNrTrianglesIn; t++)
+	{
+		for (i=0; i<3; i++)
+		{
+			const int offs = t*3 + i;
+			const int index = piTriList_in_and_out[offs];
+
+			const SVec3 vP = GetPosition(pContext, index);
+			const SVec3 vN = GetNormal(pContext, index);
+			const SVec3 vT = GetTexCoord(pContext, index);
+
+			tbool bFound = TFALSE;
+			int t2=0, index2rec=-1;
+			while (!bFound && t2<=t)
+			{
+				int j=0;
+				while (!bFound && j<3)
+				{
+					const int index2 = piTriList_in_and_out[t2*3 + j];
+					const SVec3 vP2 = GetPosition(pContext, index2);
+					const SVec3 vN2 = GetNormal(pContext, index2);
+					const SVec3 vT2 = GetTexCoord(pContext, index2);
+					
+					if (veq(vP,vP2) && veq(vN,vN2) && veq(vT,vT2))
+						bFound = TTRUE;
+					else
+						++j;
+				}
+				if (!bFound) ++t2;
+			}
+
+			assert(bFound);
+			// if we found our own
+			if (index2rec == index) { ++iNumUniqueVerts; }
+
+			piTriList_in_and_out[offs] = index2rec;
+		}
+	}
+}
+
+static int GenerateInitialVerticesIndexList(STriInfo pTriInfos[], int piTriList_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn)
+{
+	int iTSpacesOffs = 0, f=0, t=0;
+	int iDstTriIndex = 0;
+	for (f=0; f<pContext->m_pInterface->m_getNumFaces(pContext); f++)
+	{
+		const int verts = pContext->m_pInterface->m_getNumVerticesOfFace(pContext, f);
+		if (verts!=3 && verts!=4) continue;
+
+		pTriInfos[iDstTriIndex].iOrgFaceNumber = f;
+		pTriInfos[iDstTriIndex].iTSpacesOffs = iTSpacesOffs;
+
+		if (verts==3)
+		{
+			unsigned char * pVerts = pTriInfos[iDstTriIndex].vert_num;
+			pVerts[0]=0; pVerts[1]=1; pVerts[2]=2;
+			piTriList_out[iDstTriIndex*3+0] = MakeIndex(f, 0);
+			piTriList_out[iDstTriIndex*3+1] = MakeIndex(f, 1);
+			piTriList_out[iDstTriIndex*3+2] = MakeIndex(f, 2);
+			++iDstTriIndex;	// next
+		}
+		else
+		{
+			{
+				pTriInfos[iDstTriIndex+1].iOrgFaceNumber = f;
+				pTriInfos[iDstTriIndex+1].iTSpacesOffs = iTSpacesOffs;
+			}
+
+			{
+				// need an order independent way to evaluate
+				// tspace on quads. This is done by splitting
+				// along the shortest diagonal.
+				const int i0 = MakeIndex(f, 0);
+				const int i1 = MakeIndex(f, 1);
+				const int i2 = MakeIndex(f, 2);
+				const int i3 = MakeIndex(f, 3);
+				const SVec3 T0 = GetTexCoord(pContext, i0);
+				const SVec3 T1 = GetTexCoord(pContext, i1);
+				const SVec3 T2 = GetTexCoord(pContext, i2);
+				const SVec3 T3 = GetTexCoord(pContext, i3);
+				const float distSQ_02 = LengthSquared(vsub(T2,T0));
+				const float distSQ_13 = LengthSquared(vsub(T3,T1));
+				tbool bQuadDiagIs_02;
+				if (distSQ_02<distSQ_13)
+					bQuadDiagIs_02 = TTRUE;
+				else if (distSQ_13<distSQ_02)
+					bQuadDiagIs_02 = TFALSE;
+				else
+				{
+					const SVec3 P0 = GetPosition(pContext, i0);
+					const SVec3 P1 = GetPosition(pContext, i1);
+					const SVec3 P2 = GetPosition(pContext, i2);
+					const SVec3 P3 = GetPosition(pContext, i3);
+					const float distSQ_02 = LengthSquared(vsub(P2,P0));
+					const float distSQ_13 = LengthSquared(vsub(P3,P1));
+
+					bQuadDiagIs_02 = distSQ_13<distSQ_02 ? TFALSE : TTRUE;
+				}
+
+				if (bQuadDiagIs_02)
+				{
+					{
+						unsigned char * pVerts_A = pTriInfos[iDstTriIndex].vert_num;
+						pVerts_A[0]=0; pVerts_A[1]=1; pVerts_A[2]=2;
+					}
+					piTriList_out[iDstTriIndex*3+0] = i0;
+					piTriList_out[iDstTriIndex*3+1] = i1;
+					piTriList_out[iDstTriIndex*3+2] = i2;
+					++iDstTriIndex;	// next
+					{
+						unsigned char * pVerts_B = pTriInfos[iDstTriIndex].vert_num;
+						pVerts_B[0]=0; pVerts_B[1]=2; pVerts_B[2]=3;
+					}
+					piTriList_out[iDstTriIndex*3+0] = i0;
+					piTriList_out[iDstTriIndex*3+1] = i2;
+					piTriList_out[iDstTriIndex*3+2] = i3;
+					++iDstTriIndex;	// next
+				}
+				else
+				{
+					{
+						unsigned char * pVerts_A = pTriInfos[iDstTriIndex].vert_num;
+						pVerts_A[0]=0; pVerts_A[1]=1; pVerts_A[2]=3;
+					}
+					piTriList_out[iDstTriIndex*3+0] = i0;
+					piTriList_out[iDstTriIndex*3+1] = i1;
+					piTriList_out[iDstTriIndex*3+2] = i3;
+					++iDstTriIndex;	// next
+					{
+						unsigned char * pVerts_B = pTriInfos[iDstTriIndex].vert_num;
+						pVerts_B[0]=1; pVerts_B[1]=2; pVerts_B[2]=3;
+					}
+					piTriList_out[iDstTriIndex*3+0] = i1;
+					piTriList_out[iDstTriIndex*3+1] = i2;
+					piTriList_out[iDstTriIndex*3+2] = i3;
+					++iDstTriIndex;	// next
+				}
+			}
+		}
+
+		iTSpacesOffs += verts;
+		assert(iDstTriIndex<=iNrTrianglesIn);
+	}
+
+	for (t=0; t<iNrTrianglesIn; t++)
+		pTriInfos[t].iFlag = 0;
+
+	// return total amount of tspaces
+	return iTSpacesOffs;
+}
+
+static SVec3 GetPosition(const SMikkTSpaceContext * pContext, const int index)
+{
+	int iF, iI;
+	SVec3 res; float pos[3];
+	IndexToData(&iF, &iI, index);
+	pContext->m_pInterface->m_getPosition(pContext, pos, iF, iI);
+	res.x=pos[0]; res.y=pos[1]; res.z=pos[2];
+	return res;
+}
+
+static SVec3 GetNormal(const SMikkTSpaceContext * pContext, const int index)
+{
+	int iF, iI;
+	SVec3 res; float norm[3];
+	IndexToData(&iF, &iI, index);
+	pContext->m_pInterface->m_getNormal(pContext, norm, iF, iI);
+	res.x=norm[0]; res.y=norm[1]; res.z=norm[2];
+	return res;
+}
+
+static SVec3 GetTexCoord(const SMikkTSpaceContext * pContext, const int index)
+{
+	int iF, iI;
+	SVec3 res; float texc[2];
+	IndexToData(&iF, &iI, index);
+	pContext->m_pInterface->m_getTexCoord(pContext, texc, iF, iI);
+	res.x=texc[0]; res.y=texc[1]; res.z=1.0f;
+	return res;
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+
+typedef union {
+	struct
+	{
+		int i0, i1, f;
+	};
+	int array[3];
+} SEdge;
+
+static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int piTriListIn[], const int iNrTrianglesIn);
+static void BuildNeighborsSlow(STriInfo pTriInfos[], const int piTriListIn[], const int iNrTrianglesIn);
+
+// returns the texture area times 2
+static float CalcTexArea(const SMikkTSpaceContext * pContext, const int indices[])
+{
+	const SVec3 t1 = GetTexCoord(pContext, indices[0]);
+	const SVec3 t2 = GetTexCoord(pContext, indices[1]);
+	const SVec3 t3 = GetTexCoord(pContext, indices[2]);
+
+	const float t21x = t2.x-t1.x;
+	const float t21y = t2.y-t1.y;
+	const float t31x = t3.x-t1.x;
+	const float t31y = t3.y-t1.y;
+
+	const float fSignedAreaSTx2 = t21x*t31y - t21y*t31x;
+
+	return fSignedAreaSTx2<0 ? (-fSignedAreaSTx2) : fSignedAreaSTx2;
+}
+
+static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn)
+{
+	int f=0, i=0, t=0;
+	// pTriInfos[f].iFlag is cleared in GenerateInitialVerticesIndexList() which is called before this function.
+
+	// generate neighbor info list
+	for (f=0; f<iNrTrianglesIn; f++)
+		for (i=0; i<3; i++)
+		{
+			pTriInfos[f].FaceNeighbors[i] = -1;
+			pTriInfos[f].AssignedGroup[i] = NULL;
+
+			pTriInfos[f].vOs.x=0.0f; pTriInfos[f].vOs.y=0.0f; pTriInfos[f].vOs.z=0.0f;
+			pTriInfos[f].vOt.x=0.0f; pTriInfos[f].vOt.y=0.0f; pTriInfos[f].vOt.z=0.0f;
+			pTriInfos[f].fMagS = 0;
+			pTriInfos[f].fMagT = 0;
+
+			// assumed bad
+			pTriInfos[f].iFlag |= GROUP_WITH_ANY;
+		}
+
+	// evaluate first order derivatives
+	for (f=0; f<iNrTrianglesIn; f++)
+	{
+		// initial values
+		const SVec3 v1 = GetPosition(pContext, piTriListIn[f*3+0]);
+		const SVec3 v2 = GetPosition(pContext, piTriListIn[f*3+1]);
+		const SVec3 v3 = GetPosition(pContext, piTriListIn[f*3+2]);
+		const SVec3 t1 = GetTexCoord(pContext, piTriListIn[f*3+0]);
+		const SVec3 t2 = GetTexCoord(pContext, piTriListIn[f*3+1]);
+		const SVec3 t3 = GetTexCoord(pContext, piTriListIn[f*3+2]);
+
+		const float t21x = t2.x-t1.x;
+		const float t21y = t2.y-t1.y;
+		const float t31x = t3.x-t1.x;
+		const float t31y = t3.y-t1.y;
+		const SVec3 d1 = vsub(v2,v1);
+		const SVec3 d2 = vsub(v3,v1);
+
+		const float fSignedAreaSTx2 = t21x*t31y - t21y*t31x;
+		//assert(fSignedAreaSTx2!=0);
+		SVec3 vOs = vsub(vscale(t31y,d1), vscale(t21y,d2));	// eq 18
+		SVec3 vOt = vadd(vscale(-t31x,d1), vscale(t21x,d2)); // eq 19
+
+		pTriInfos[f].iFlag |= (fSignedAreaSTx2>0 ? ORIENT_PRESERVING : 0);
+
+		if ( NotZero(fSignedAreaSTx2) )
+		{
+			const float fAbsArea = fabsf(fSignedAreaSTx2);
+			const float fLenOs = Length(vOs);
+			const float fLenOt = Length(vOt);
+			const float fS = (pTriInfos[f].iFlag&ORIENT_PRESERVING)==0 ? (-1.0f) : 1.0f;
+			if ( NotZero(fLenOs) ) pTriInfos[f].vOs = vscale(fS/fLenOs, vOs);
+			if ( NotZero(fLenOt) ) pTriInfos[f].vOt = vscale(fS/fLenOt, vOt);
+
+			// evaluate magnitudes prior to normalization of vOs and vOt
+			pTriInfos[f].fMagS = fLenOs / fAbsArea;
+			pTriInfos[f].fMagT = fLenOt / fAbsArea;
+
+			// if this is a good triangle
+			if ( NotZero(pTriInfos[f].fMagS) && NotZero(pTriInfos[f].fMagT))
+				pTriInfos[f].iFlag &= (~GROUP_WITH_ANY);
+		}
+	}
+
+	// force otherwise healthy quads to a fixed orientation
+	while (t<(iNrTrianglesIn-1))
+	{
+		const int iFO_a = pTriInfos[t].iOrgFaceNumber;
+		const int iFO_b = pTriInfos[t+1].iOrgFaceNumber;
+		if (iFO_a==iFO_b)	// this is a quad
+		{
+			const tbool bIsDeg_a = (pTriInfos[t].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE;
+			const tbool bIsDeg_b = (pTriInfos[t+1].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE;
+			
+			// bad triangles should already have been removed by
+			// DegenPrologue(), but just in case check bIsDeg_a and bIsDeg_a are false
+			if ((bIsDeg_a||bIsDeg_b)==TFALSE)
+			{
+				const tbool bOrientA = (pTriInfos[t].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
+				const tbool bOrientB = (pTriInfos[t+1].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
+				// if this happens the quad has extremely bad mapping!!
+				if (bOrientA!=bOrientB)
+				{
+					//printf("found quad with bad mapping\n");
+					tbool bChooseOrientFirstTri = TFALSE;
+					if ((pTriInfos[t+1].iFlag&GROUP_WITH_ANY)!=0) bChooseOrientFirstTri = TTRUE;
+					else if ( CalcTexArea(pContext, &piTriListIn[t*3+0]) >= CalcTexArea(pContext, &piTriListIn[(t+1)*3+0]) )
+						bChooseOrientFirstTri = TTRUE;
+
+					// force match
+					{
+						const int t0 = bChooseOrientFirstTri ? t : (t+1);
+						const int t1 = bChooseOrientFirstTri ? (t+1) : t;
+						pTriInfos[t1].iFlag &= (~ORIENT_PRESERVING);	// clear first
+						pTriInfos[t1].iFlag |= (pTriInfos[t0].iFlag&ORIENT_PRESERVING);	// copy bit
+					}
+				}
+			}
+			t += 2;
+		}
+		else
+			++t;
+	}
+	
+	// match up edge pairs
+	{
+		SEdge * pEdges = (SEdge *) malloc(sizeof(SEdge)*iNrTrianglesIn*3);
+		if (pEdges==NULL)
+			BuildNeighborsSlow(pTriInfos, piTriListIn, iNrTrianglesIn);
+		else
+		{
+			BuildNeighborsFast(pTriInfos, pEdges, piTriListIn, iNrTrianglesIn);
+	
+			free(pEdges);
+		}
+	}
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+
+static tbool AssignRecur(const int piTriListIn[], STriInfo psTriInfos[], const int iMyTriIndex, SGroup * pGroup);
+static void AddTriToGroup(SGroup * pGroup, const int iTriIndex);
+
+static int Build4RuleGroups(STriInfo pTriInfos[], SGroup pGroups[], int piGroupTrianglesBuffer[], const int piTriListIn[], const int iNrTrianglesIn)
+{
+	const int iNrMaxGroups = iNrTrianglesIn*3;
+	int iNrActiveGroups = 0;
+	int iOffset = 0, f=0, i=0;
+	(void)iNrMaxGroups;  /* quiet warnings in non debug mode */
+	for (f=0; f<iNrTrianglesIn; f++)
+	{
+		for (i=0; i<3; i++)
+		{
+			// if not assigned to a group
+			if ((pTriInfos[f].iFlag&GROUP_WITH_ANY)==0 && pTriInfos[f].AssignedGroup[i]==NULL)
+			{
+				tbool bOrPre;
+				int neigh_indexL, neigh_indexR;
+				const int vert_index = piTriListIn[f*3+i];
+				assert(iNrActiveGroups<iNrMaxGroups);
+				pTriInfos[f].AssignedGroup[i] = &pGroups[iNrActiveGroups];
+				pTriInfos[f].AssignedGroup[i]->iVertexRepresentitive = vert_index;
+				pTriInfos[f].AssignedGroup[i]->bOrientPreservering = (pTriInfos[f].iFlag&ORIENT_PRESERVING)!=0;
+				pTriInfos[f].AssignedGroup[i]->iNrFaces = 0;
+				pTriInfos[f].AssignedGroup[i]->pFaceIndices = &piGroupTrianglesBuffer[iOffset];
+				++iNrActiveGroups;
+
+				AddTriToGroup(pTriInfos[f].AssignedGroup[i], f);
+				bOrPre = (pTriInfos[f].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
+				neigh_indexL = pTriInfos[f].FaceNeighbors[i];
+				neigh_indexR = pTriInfos[f].FaceNeighbors[i>0?(i-1):2];
+				if (neigh_indexL>=0) // neighbor
+				{
+					const tbool bAnswer =
+						AssignRecur(piTriListIn, pTriInfos, neigh_indexL,
+									pTriInfos[f].AssignedGroup[i] );
+					
+					const tbool bOrPre2 = (pTriInfos[neigh_indexL].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
+					const tbool bDiff = bOrPre!=bOrPre2 ? TTRUE : TFALSE;
+					assert(bAnswer || bDiff);
+					(void)bAnswer, (void)bDiff;  /* quiet warnings in non debug mode */
+				}
+				if (neigh_indexR>=0) // neighbor
+				{
+					const tbool bAnswer =
+						AssignRecur(piTriListIn, pTriInfos, neigh_indexR,
+									pTriInfos[f].AssignedGroup[i] );
+
+					const tbool bOrPre2 = (pTriInfos[neigh_indexR].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
+					const tbool bDiff = bOrPre!=bOrPre2 ? TTRUE : TFALSE;
+					assert(bAnswer || bDiff);
+					(void)bAnswer, (void)bDiff;  /* quiet warnings in non debug mode */
+				}
+
+				// update offset
+				iOffset += pTriInfos[f].AssignedGroup[i]->iNrFaces;
+				// since the groups are disjoint a triangle can never
+				// belong to more than 3 groups. Subsequently something
+				// is completely screwed if this assertion ever hits.
+				assert(iOffset <= iNrMaxGroups);
+			}
+		}
+	}
+
+	return iNrActiveGroups;
+}
+
+static void AddTriToGroup(SGroup * pGroup, const int iTriIndex)
+{
+	pGroup->pFaceIndices[pGroup->iNrFaces] = iTriIndex;
+	++pGroup->iNrFaces;
+}
+
+static tbool AssignRecur(const int piTriListIn[], STriInfo psTriInfos[],
+				 const int iMyTriIndex, SGroup * pGroup)
+{
+	STriInfo * pMyTriInfo = &psTriInfos[iMyTriIndex];
+
+	// track down vertex
+	const int iVertRep = pGroup->iVertexRepresentitive;
+	const int * pVerts = &piTriListIn[3*iMyTriIndex+0];
+	int i=-1;
+	if (pVerts[0]==iVertRep) i=0;
+	else if (pVerts[1]==iVertRep) i=1;
+	else if (pVerts[2]==iVertRep) i=2;
+	assert(i>=0 && i<3);
+
+	// early out
+	if (pMyTriInfo->AssignedGroup[i] == pGroup) return TTRUE;
+	else if (pMyTriInfo->AssignedGroup[i]!=NULL) return TFALSE;
+	if ((pMyTriInfo->iFlag&GROUP_WITH_ANY)!=0)
+	{
+		// first to group with a group-with-anything triangle
+		// determines it's orientation.
+		// This is the only existing order dependency in the code!!
+		if ( pMyTriInfo->AssignedGroup[0] == NULL &&
+			pMyTriInfo->AssignedGroup[1] == NULL &&
+			pMyTriInfo->AssignedGroup[2] == NULL )
+		{
+			pMyTriInfo->iFlag &= (~ORIENT_PRESERVING);
+			pMyTriInfo->iFlag |= (pGroup->bOrientPreservering ? ORIENT_PRESERVING : 0);
+		}
+	}
+	{
+		const tbool bOrient = (pMyTriInfo->iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
+		if (bOrient != pGroup->bOrientPreservering) return TFALSE;
+	}
+
+	AddTriToGroup(pGroup, iMyTriIndex);
+	pMyTriInfo->AssignedGroup[i] = pGroup;
+
+	{
+		const int neigh_indexL = pMyTriInfo->FaceNeighbors[i];
+		const int neigh_indexR = pMyTriInfo->FaceNeighbors[i>0?(i-1):2];
+		if (neigh_indexL>=0)
+			AssignRecur(piTriListIn, psTriInfos, neigh_indexL, pGroup);
+		if (neigh_indexR>=0)
+			AssignRecur(piTriListIn, psTriInfos, neigh_indexR, pGroup);
+	}
+
+
+
+	return TTRUE;
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+
+static tbool CompareSubGroups(const SSubGroup * pg1, const SSubGroup * pg2);
+static void QuickSort(int* pSortBuffer, int iLeft, int iRight, unsigned int uSeed);
+static STSpace EvalTspace(int face_indices[], const int iFaces, const int piTriListIn[], const STriInfo pTriInfos[], const SMikkTSpaceContext * pContext, const int iVertexRepresentitive);
+
+static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], const SGroup pGroups[],
+                             const int iNrActiveGroups, const int piTriListIn[], const float fThresCos,
+                             const SMikkTSpaceContext * pContext)
+{
+	STSpace * pSubGroupTspace = NULL;
+	SSubGroup * pUniSubGroups = NULL;
+	int * pTmpMembers = NULL;
+	int iMaxNrFaces=0, iUniqueTspaces=0, g=0, i=0;
+	for (g=0; g<iNrActiveGroups; g++)
+		if (iMaxNrFaces < pGroups[g].iNrFaces)
+			iMaxNrFaces = pGroups[g].iNrFaces;
+
+	if (iMaxNrFaces == 0) return TTRUE;
+
+	// make initial allocations
+	pSubGroupTspace = (STSpace *) malloc(sizeof(STSpace)*iMaxNrFaces);
+	pUniSubGroups = (SSubGroup *) malloc(sizeof(SSubGroup)*iMaxNrFaces);
+	pTmpMembers = (int *) malloc(sizeof(int)*iMaxNrFaces);
+	if (pSubGroupTspace==NULL || pUniSubGroups==NULL || pTmpMembers==NULL)
+	{
+		if (pSubGroupTspace!=NULL) free(pSubGroupTspace);
+		if (pUniSubGroups!=NULL) free(pUniSubGroups);
+		if (pTmpMembers!=NULL) free(pTmpMembers);
+		return TFALSE;
+	}
+
+
+	iUniqueTspaces = 0;
+	for (g=0; g<iNrActiveGroups; g++)
+	{
+		const SGroup * pGroup = &pGroups[g];
+		int iUniqueSubGroups = 0, s=0;
+
+		for (i=0; i<pGroup->iNrFaces; i++)	// triangles
+		{
+			const int f = pGroup->pFaceIndices[i];	// triangle number
+			int index=-1, iVertIndex=-1, iOF_1=-1, iMembers=0, j=0, l=0;
+			SSubGroup tmp_group;
+			tbool bFound;
+			SVec3 n, vOs, vOt;
+			if (pTriInfos[f].AssignedGroup[0]==pGroup) index=0;
+			else if (pTriInfos[f].AssignedGroup[1]==pGroup) index=1;
+			else if (pTriInfos[f].AssignedGroup[2]==pGroup) index=2;
+			assert(index>=0 && index<3);
+
+			iVertIndex = piTriListIn[f*3+index];
+			assert(iVertIndex==pGroup->iVertexRepresentitive);
+
+			// is normalized already
+			n = GetNormal(pContext, iVertIndex);
+			
+			// project
+			vOs = vsub(pTriInfos[f].vOs, vscale(vdot(n,pTriInfos[f].vOs), n));
+			vOt = vsub(pTriInfos[f].vOt, vscale(vdot(n,pTriInfos[f].vOt), n));
+			if ( VNotZero(vOs) ) vOs = Normalize(vOs);
+			if ( VNotZero(vOt) ) vOt = Normalize(vOt);
+
+			// original face number
+			iOF_1 = pTriInfos[f].iOrgFaceNumber;
+			
+			iMembers = 0;
+			for (j=0; j<pGroup->iNrFaces; j++)
+			{
+				const int t = pGroup->pFaceIndices[j];	// triangle number
+				const int iOF_2 = pTriInfos[t].iOrgFaceNumber;
+
+				// project
+				SVec3 vOs2 = vsub(pTriInfos[t].vOs, vscale(vdot(n,pTriInfos[t].vOs), n));
+				SVec3 vOt2 = vsub(pTriInfos[t].vOt, vscale(vdot(n,pTriInfos[t].vOt), n));
+				if ( VNotZero(vOs2) ) vOs2 = Normalize(vOs2);
+				if ( VNotZero(vOt2) ) vOt2 = Normalize(vOt2);
+
+				{
+					const tbool bAny = ( (pTriInfos[f].iFlag | pTriInfos[t].iFlag) & GROUP_WITH_ANY )!=0 ? TTRUE : TFALSE;
+					// make sure triangles which belong to the same quad are joined.
+					const tbool bSameOrgFace = iOF_1==iOF_2 ? TTRUE : TFALSE;
+
+					const float fCosS = vdot(vOs,vOs2);
+					const float fCosT = vdot(vOt,vOt2);
+
+					assert(f!=t || bSameOrgFace);	// sanity check
+					if (bAny || bSameOrgFace || (fCosS>fThresCos && fCosT>fThresCos))
+						pTmpMembers[iMembers++] = t;
+				}
+			}
+
+			// sort pTmpMembers
+			tmp_group.iNrFaces = iMembers;
+			tmp_group.pTriMembers = pTmpMembers;
+			if (iMembers>1)
+			{
+				unsigned int uSeed = INTERNAL_RND_SORT_SEED;	// could replace with a random seed?
+				QuickSort(pTmpMembers, 0, iMembers-1, uSeed);
+			}
+
+			// look for an existing match
+			bFound = TFALSE;
+			l=0;
+			while (l<iUniqueSubGroups && !bFound)
+			{
+				bFound = CompareSubGroups(&tmp_group, &pUniSubGroups[l]);
+				if (!bFound) ++l;
+			}
+			
+			// assign tangent space index
+			assert(bFound || l==iUniqueSubGroups);
+			//piTempTangIndices[f*3+index] = iUniqueTspaces+l;
+
+			// if no match was found we allocate a new subgroup
+			if (!bFound)
+			{
+				// insert new subgroup
+				int * pIndices = (int *) malloc(sizeof(int)*iMembers);
+				if (pIndices==NULL)
+				{
+					// clean up and return false
+					int s=0;
+					for (s=0; s<iUniqueSubGroups; s++)
+						free(pUniSubGroups[s].pTriMembers);
+					free(pUniSubGroups);
+					free(pTmpMembers);
+					free(pSubGroupTspace);
+					return TFALSE;
+				}
+				pUniSubGroups[iUniqueSubGroups].iNrFaces = iMembers;
+				pUniSubGroups[iUniqueSubGroups].pTriMembers = pIndices;
+				memcpy(pIndices, tmp_group.pTriMembers, iMembers*sizeof(int));
+				pSubGroupTspace[iUniqueSubGroups] =
+					EvalTspace(tmp_group.pTriMembers, iMembers, piTriListIn, pTriInfos, pContext, pGroup->iVertexRepresentitive);
+				++iUniqueSubGroups;
+			}
+
+			// output tspace
+			{
+				const int iOffs = pTriInfos[f].iTSpacesOffs;
+				const int iVert = pTriInfos[f].vert_num[index];
+				STSpace * pTS_out = &psTspace[iOffs+iVert];
+				assert(pTS_out->iCounter<2);
+				assert(((pTriInfos[f].iFlag&ORIENT_PRESERVING)!=0) == pGroup->bOrientPreservering);
+				if (pTS_out->iCounter==1)
+				{
+					*pTS_out = AvgTSpace(pTS_out, &pSubGroupTspace[l]);
+					pTS_out->iCounter = 2;	// update counter
+					pTS_out->bOrient = pGroup->bOrientPreservering;
+				}
+				else
+				{
+					assert(pTS_out->iCounter==0);
+					*pTS_out = pSubGroupTspace[l];
+					pTS_out->iCounter = 1;	// update counter
+					pTS_out->bOrient = pGroup->bOrientPreservering;
+				}
+			}
+		}
+
+		// clean up and offset iUniqueTspaces
+		for (s=0; s<iUniqueSubGroups; s++)
+			free(pUniSubGroups[s].pTriMembers);
+		iUniqueTspaces += iUniqueSubGroups;
+	}
+
+	// clean up
+	free(pUniSubGroups);
+	free(pTmpMembers);
+	free(pSubGroupTspace);
+
+	return TTRUE;
+}
+
+static STSpace EvalTspace(int face_indices[], const int iFaces, const int piTriListIn[], const STriInfo pTriInfos[],
+                          const SMikkTSpaceContext * pContext, const int iVertexRepresentitive)
+{
+	STSpace res;
+	float fAngleSum = 0;
+	int face=0;
+	res.vOs.x=0.0f; res.vOs.y=0.0f; res.vOs.z=0.0f;
+	res.vOt.x=0.0f; res.vOt.y=0.0f; res.vOt.z=0.0f;
+	res.fMagS = 0; res.fMagT = 0;
+
+	for (face=0; face<iFaces; face++)
+	{
+		const int f = face_indices[face];
+
+		// only valid triangles get to add their contribution
+		if ( (pTriInfos[f].iFlag&GROUP_WITH_ANY)==0 )
+		{
+			SVec3 n, vOs, vOt, p0, p1, p2, v1, v2;
+			float fCos, fAngle, fMagS, fMagT;
+			int i=-1, index=-1, i0=-1, i1=-1, i2=-1;
+			if (piTriListIn[3*f+0]==iVertexRepresentitive) i=0;
+			else if (piTriListIn[3*f+1]==iVertexRepresentitive) i=1;
+			else if (piTriListIn[3*f+2]==iVertexRepresentitive) i=2;
+			assert(i>=0 && i<3);
+
+			// project
+			index = piTriListIn[3*f+i];
+			n = GetNormal(pContext, index);
+			vOs = vsub(pTriInfos[f].vOs, vscale(vdot(n,pTriInfos[f].vOs), n));
+			vOt = vsub(pTriInfos[f].vOt, vscale(vdot(n,pTriInfos[f].vOt), n));
+			if ( VNotZero(vOs) ) vOs = Normalize(vOs);
+			if ( VNotZero(vOt) ) vOt = Normalize(vOt);
+
+			i2 = piTriListIn[3*f + (i<2?(i+1):0)];
+			i1 = piTriListIn[3*f + i];
+			i0 = piTriListIn[3*f + (i>0?(i-1):2)];
+
+			p0 = GetPosition(pContext, i0);
+			p1 = GetPosition(pContext, i1);
+			p2 = GetPosition(pContext, i2);
+			v1 = vsub(p0,p1);
+			v2 = vsub(p2,p1);
+
+			// project
+			v1 = vsub(v1, vscale(vdot(n,v1),n)); if ( VNotZero(v1) ) v1 = Normalize(v1);
+			v2 = vsub(v2, vscale(vdot(n,v2),n)); if ( VNotZero(v2) ) v2 = Normalize(v2);
+
+			// weight contribution by the angle
+			// between the two edge vectors
+			fCos = vdot(v1,v2); fCos=fCos>1?1:(fCos<(-1) ? (-1) : fCos);
+			fAngle = (float) acos(fCos);
+			fMagS = pTriInfos[f].fMagS;
+			fMagT = pTriInfos[f].fMagT;
+
+			res.vOs=vadd(res.vOs, vscale(fAngle,vOs));
+			res.vOt=vadd(res.vOt,vscale(fAngle,vOt));
+			res.fMagS+=(fAngle*fMagS);
+			res.fMagT+=(fAngle*fMagT);
+			fAngleSum += fAngle;
+		}
+	}
+
+	// normalize
+	if ( VNotZero(res.vOs) ) res.vOs = Normalize(res.vOs);
+	if ( VNotZero(res.vOt) ) res.vOt = Normalize(res.vOt);
+	if (fAngleSum>0)
+	{
+		res.fMagS /= fAngleSum;
+		res.fMagT /= fAngleSum;
+	}
+
+	return res;
+}
+
+static tbool CompareSubGroups(const SSubGroup * pg1, const SSubGroup * pg2)
+{
+	tbool bStillSame=TTRUE;
+	int i=0;
+	if (pg1->iNrFaces!=pg2->iNrFaces) return TFALSE;
+	while (i<pg1->iNrFaces && bStillSame)
+	{
+		bStillSame = pg1->pTriMembers[i]==pg2->pTriMembers[i] ? TTRUE : TFALSE;
+		if (bStillSame) ++i;
+	}
+	return bStillSame;
+}
+
+static void QuickSort(int* pSortBuffer, int iLeft, int iRight, unsigned int uSeed)
+{
+	int iL, iR, n, index, iMid, iTmp;
+
+	// Random
+	unsigned int t=uSeed&31;
+	t=(uSeed<<t)|(uSeed>>(32-t));
+	uSeed=uSeed+t+3;
+	// Random end
+
+	iL=iLeft; iR=iRight;
+	n = (iR-iL)+1;
+	assert(n>=0);
+	index = (int) (uSeed%n);
+
+	iMid=pSortBuffer[index + iL];
+
+
+	do
+	{
+		while (pSortBuffer[iL] < iMid)
+			++iL;
+		while (pSortBuffer[iR] > iMid)
+			--iR;
+
+		if (iL <= iR)
+		{
+			iTmp = pSortBuffer[iL];
+			pSortBuffer[iL] = pSortBuffer[iR];
+			pSortBuffer[iR] = iTmp;
+			++iL; --iR;
+		}
+	}
+	while (iL <= iR);
+
+	if (iLeft < iR)
+		QuickSort(pSortBuffer, iLeft, iR, uSeed);
+	if (iL < iRight)
+		QuickSort(pSortBuffer, iL, iRight, uSeed);
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////////
+
+static void QuickSortEdges(SEdge * pSortBuffer, int iLeft, int iRight, const int channel, unsigned int uSeed);
+static void GetEdge(int * i0_out, int * i1_out, int * edgenum_out, const int indices[], const int i0_in, const int i1_in);
+
+static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int piTriListIn[], const int iNrTrianglesIn)
+{
+	// build array of edges
+	unsigned int uSeed = INTERNAL_RND_SORT_SEED;				// could replace with a random seed?
+	int iEntries=0, iCurStartIndex=-1, f=0, i=0;
+	for (f=0; f<iNrTrianglesIn; f++)
+		for (i=0; i<3; i++)
+		{
+			const int i0 = piTriListIn[f*3+i];
+			const int i1 = piTriListIn[f*3+(i<2?(i+1):0)];
+			pEdges[f*3+i].i0 = i0 < i1 ? i0 : i1;			// put minimum index in i0
+			pEdges[f*3+i].i1 = !(i0 < i1) ? i0 : i1;		// put maximum index in i1
+			pEdges[f*3+i].f = f;							// record face number
+		}
+
+	// sort over all edges by i0, this is the pricy one.
+	QuickSortEdges(pEdges, 0, iNrTrianglesIn*3-1, 0, uSeed);	// sort channel 0 which is i0
+
+	// sub sort over i1, should be fast.
+	// could replace this with a 64 bit int sort over (i0,i1)
+	// with i0 as msb in the quicksort call above.
+	iEntries = iNrTrianglesIn*3;
+	iCurStartIndex = 0;
+	for (i=1; i<iEntries; i++)
+	{
+		if (pEdges[iCurStartIndex].i0 != pEdges[i].i0)
+		{
+			const int iL = iCurStartIndex;
+			const int iR = i-1;
+			//const int iElems = i-iL;
+			iCurStartIndex = i;
+			QuickSortEdges(pEdges, iL, iR, 1, uSeed);	// sort channel 1 which is i1
+		}
+	}
+
+	// sub sort over f, which should be fast.
+	// this step is to remain compliant with BuildNeighborsSlow() when
+	// more than 2 triangles use the same edge (such as a butterfly topology).
+	iCurStartIndex = 0;
+	for (i=1; i<iEntries; i++)
+	{
+		if (pEdges[iCurStartIndex].i0 != pEdges[i].i0 || pEdges[iCurStartIndex].i1 != pEdges[i].i1)
+		{
+			const int iL = iCurStartIndex;
+			const int iR = i-1;
+			//const int iElems = i-iL;
+			iCurStartIndex = i;
+			QuickSortEdges(pEdges, iL, iR, 2, uSeed);	// sort channel 2 which is f
+		}
+	}
+
+	// pair up, adjacent triangles
+	for (i=0; i<iEntries; i++)
+	{
+		const int i0=pEdges[i].i0;
+		const int i1=pEdges[i].i1;
+		const int f = pEdges[i].f;
+		tbool bUnassigned_A;
+
+		int i0_A, i1_A;
+		int edgenum_A, edgenum_B=0;	// 0,1 or 2
+		GetEdge(&i0_A, &i1_A, &edgenum_A, &piTriListIn[f*3], i0, i1);	// resolve index ordering and edge_num
+		bUnassigned_A = pTriInfos[f].FaceNeighbors[edgenum_A] == -1 ? TTRUE : TFALSE;
+
+		if (bUnassigned_A)
+		{
+			// get true index ordering
+			int j=i+1, t;
+			tbool bNotFound = TTRUE;
+			while (j<iEntries && i0==pEdges[j].i0 && i1==pEdges[j].i1 && bNotFound)
+			{
+				tbool bUnassigned_B;
+				int i0_B, i1_B;
+				t = pEdges[j].f;
+				// flip i0_B and i1_B
+				GetEdge(&i1_B, &i0_B, &edgenum_B, &piTriListIn[t*3], pEdges[j].i0, pEdges[j].i1);	// resolve index ordering and edge_num
+				//assert(!(i0_A==i1_B && i1_A==i0_B));
+				bUnassigned_B =  pTriInfos[t].FaceNeighbors[edgenum_B]==-1 ? TTRUE : TFALSE;
+				if (i0_A==i0_B && i1_A==i1_B && bUnassigned_B)
+					bNotFound = TFALSE;
+				else
+					++j;
+			}
+
+			if (!bNotFound)
+			{
+				int t = pEdges[j].f;
+				pTriInfos[f].FaceNeighbors[edgenum_A] = t;
+				//assert(pTriInfos[t].FaceNeighbors[edgenum_B]==-1);
+				pTriInfos[t].FaceNeighbors[edgenum_B] = f;
+			}
+		}
+	}
+}
+
+static void BuildNeighborsSlow(STriInfo pTriInfos[], const int piTriListIn[], const int iNrTrianglesIn)
+{
+	int f=0, i=0;
+	for (f=0; f<iNrTrianglesIn; f++)
+	{
+		for (i=0; i<3; i++)
+		{
+			// if unassigned
+			if (pTriInfos[f].FaceNeighbors[i] == -1)
+			{
+				const int i0_A = piTriListIn[f*3+i];
+				const int i1_A = piTriListIn[f*3+(i<2?(i+1):0)];
+
+				// search for a neighbor
+				tbool bFound = TFALSE;
+				int t=0, j=0;
+				while (!bFound && t<iNrTrianglesIn)
+				{
+					if (t!=f)
+					{
+						j=0;
+						while (!bFound && j<3)
+						{
+							// in rev order
+							const int i1_B = piTriListIn[t*3+j];
+							const int i0_B = piTriListIn[t*3+(j<2?(j+1):0)];
+							//assert(!(i0_A==i1_B && i1_A==i0_B));
+							if (i0_A==i0_B && i1_A==i1_B)
+								bFound = TTRUE;
+							else
+								++j;
+						}
+					}
+					
+					if (!bFound) ++t;
+				}
+
+				// assign neighbors
+				if (bFound)
+				{
+					pTriInfos[f].FaceNeighbors[i] = t;
+					//assert(pTriInfos[t].FaceNeighbors[j]==-1);
+					pTriInfos[t].FaceNeighbors[j] = f;
+				}
+			}
+		}
+	}
+}
+
+static void QuickSortEdges(SEdge * pSortBuffer, int iLeft, int iRight, const int channel, unsigned int uSeed)
+{
+	unsigned int t;
+	int iL, iR, n, index, iMid;
+
+	// early out
+	SEdge sTmp;
+	const int iElems = iRight-iLeft+1;
+	if (iElems<2) return;
+	else if (iElems==2)
+	{
+		if (pSortBuffer[iLeft].array[channel] > pSortBuffer[iRight].array[channel])
+		{
+			sTmp = pSortBuffer[iLeft];
+			pSortBuffer[iLeft] = pSortBuffer[iRight];
+			pSortBuffer[iRight] = sTmp;
+		}
+		return;
+	}
+
+	// Random
+	t=uSeed&31;
+	t=(uSeed<<t)|(uSeed>>(32-t));
+	uSeed=uSeed+t+3;
+	// Random end
+
+	iL=iLeft, iR=iRight;
+	n = (iR-iL)+1;
+	assert(n>=0);
+	index = (int) (uSeed%n);
+
+	iMid=pSortBuffer[index + iL].array[channel];
+
+	do
+	{
+		while (pSortBuffer[iL].array[channel] < iMid)
+			++iL;
+		while (pSortBuffer[iR].array[channel] > iMid)
+			--iR;
+
+		if (iL <= iR)
+		{
+			sTmp = pSortBuffer[iL];
+			pSortBuffer[iL] = pSortBuffer[iR];
+			pSortBuffer[iR] = sTmp;
+			++iL; --iR;
+		}
+	}
+	while (iL <= iR);
+
+	if (iLeft < iR)
+		QuickSortEdges(pSortBuffer, iLeft, iR, channel, uSeed);
+	if (iL < iRight)
+		QuickSortEdges(pSortBuffer, iL, iRight, channel, uSeed);
+}
+
+// resolve ordering and edge number
+static void GetEdge(int * i0_out, int * i1_out, int * edgenum_out, const int indices[], const int i0_in, const int i1_in)
+{
+	*edgenum_out = -1;
+	
+	// test if first index is on the edge
+	if (indices[0]==i0_in || indices[0]==i1_in)
+	{
+		// test if second index is on the edge
+		if (indices[1]==i0_in || indices[1]==i1_in)
+		{
+			edgenum_out[0]=0;	// first edge
+			i0_out[0]=indices[0];
+			i1_out[0]=indices[1];
+		}
+		else
+		{
+			edgenum_out[0]=2;	// third edge
+			i0_out[0]=indices[2];
+			i1_out[0]=indices[0];
+		}
+	}
+	else
+	{
+		// only second and third index is on the edge
+		edgenum_out[0]=1;	// second edge
+		i0_out[0]=indices[1];
+		i1_out[0]=indices[2];
+	}
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////// Degenerate triangles ////////////////////////////////////
+
+static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int iNrTrianglesIn, const int iTotTris)
+{
+	int iNextGoodTriangleSearchIndex=-1;
+	tbool bStillFindingGoodOnes;
+
+	// locate quads with only one good triangle
+	int t=0;
+	while (t<(iTotTris-1))
+	{
+		const int iFO_a = pTriInfos[t].iOrgFaceNumber;
+		const int iFO_b = pTriInfos[t+1].iOrgFaceNumber;
+		if (iFO_a==iFO_b)	// this is a quad
+		{
+			const tbool bIsDeg_a = (pTriInfos[t].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE;
+			const tbool bIsDeg_b = (pTriInfos[t+1].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE;
+			if ((bIsDeg_a^bIsDeg_b)!=0)
+			{
+				pTriInfos[t].iFlag |= QUAD_ONE_DEGEN_TRI;
+				pTriInfos[t+1].iFlag |= QUAD_ONE_DEGEN_TRI;
+			}
+			t += 2;
+		}
+		else
+			++t;
+	}
+
+	// reorder list so all degen triangles are moved to the back
+	// without reordering the good triangles
+	iNextGoodTriangleSearchIndex = 1;
+	t=0;
+	bStillFindingGoodOnes = TTRUE;
+	while (t<iNrTrianglesIn && bStillFindingGoodOnes)
+	{
+		const tbool bIsGood = (pTriInfos[t].iFlag&MARK_DEGENERATE)==0 ? TTRUE : TFALSE;
+		if (bIsGood)
+		{
+			if (iNextGoodTriangleSearchIndex < (t+2))
+				iNextGoodTriangleSearchIndex = t+2;
+		}
+		else
+		{
+			int t0, t1;
+			// search for the first good triangle.
+			tbool bJustADegenerate = TTRUE;
+			while (bJustADegenerate && iNextGoodTriangleSearchIndex<iTotTris)
+			{
+				const tbool bIsGood = (pTriInfos[iNextGoodTriangleSearchIndex].iFlag&MARK_DEGENERATE)==0 ? TTRUE : TFALSE;
+				if (bIsGood) bJustADegenerate=TFALSE;
+				else ++iNextGoodTriangleSearchIndex;
+			}
+
+			t0 = t;
+			t1 = iNextGoodTriangleSearchIndex;
+			++iNextGoodTriangleSearchIndex;
+			assert(iNextGoodTriangleSearchIndex > (t+1));
+
+			// swap triangle t0 and t1
+			if (!bJustADegenerate)
+			{
+				int i=0;
+				for (i=0; i<3; i++)
+				{
+					const int index = piTriList_out[t0*3+i];
+					piTriList_out[t0*3+i] = piTriList_out[t1*3+i];
+					piTriList_out[t1*3+i] = index;
+				}
+				{
+					const STriInfo tri_info = pTriInfos[t0];
+					pTriInfos[t0] = pTriInfos[t1];
+					pTriInfos[t1] = tri_info;
+				}
+			}
+			else
+				bStillFindingGoodOnes = TFALSE;	// this is not supposed to happen
+		}
+
+		if (bStillFindingGoodOnes) ++t;
+	}
+
+	assert(bStillFindingGoodOnes);	// code will still work.
+	assert(iNrTrianglesIn == t);
+}
+
+static void DegenEpilogue(STSpace psTspace[], STriInfo pTriInfos[], int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn, const int iTotTris)
+{
+	int t=0, i=0;
+	// deal with degenerate triangles
+	// punishment for degenerate triangles is O(N^2)
+	for (t=iNrTrianglesIn; t<iTotTris; t++)
+	{
+		// degenerate triangles on a quad with one good triangle are skipped
+		// here but processed in the next loop
+		const tbool bSkip = (pTriInfos[t].iFlag&QUAD_ONE_DEGEN_TRI)!=0 ? TTRUE : TFALSE;
+
+		if (!bSkip)
+		{
+			for (i=0; i<3; i++)
+			{
+				const int index1 = piTriListIn[t*3+i];
+				// search through the good triangles
+				tbool bNotFound = TTRUE;
+				int j=0;
+				while (bNotFound && j<(3*iNrTrianglesIn))
+				{
+					const int index2 = piTriListIn[j];
+					if (index1==index2) bNotFound=TFALSE;
+					else ++j;
+				}
+
+				if (!bNotFound)
+				{
+					const int iTri = j/3;
+					const int iVert = j%3;
+					const int iSrcVert=pTriInfos[iTri].vert_num[iVert];
+					const int iSrcOffs=pTriInfos[iTri].iTSpacesOffs;
+					const int iDstVert=pTriInfos[t].vert_num[i];
+					const int iDstOffs=pTriInfos[t].iTSpacesOffs;
+					
+					// copy tspace
+					psTspace[iDstOffs+iDstVert] = psTspace[iSrcOffs+iSrcVert];
+				}
+			}
+		}
+	}
+
+	// deal with degenerate quads with one good triangle
+	for (t=0; t<iNrTrianglesIn; t++)
+	{
+		// this triangle belongs to a quad where the
+		// other triangle is degenerate
+		if ( (pTriInfos[t].iFlag&QUAD_ONE_DEGEN_TRI)!=0 )
+		{
+			SVec3 vDstP;
+			int iOrgF=-1, i=0;
+			tbool bNotFound;
+			unsigned char * pV = pTriInfos[t].vert_num;
+			int iFlag = (1<<pV[0]) | (1<<pV[1]) | (1<<pV[2]);
+			int iMissingIndex = 0;
+			if ((iFlag&2)==0) iMissingIndex=1;
+			else if ((iFlag&4)==0) iMissingIndex=2;
+			else if ((iFlag&8)==0) iMissingIndex=3;
+
+			iOrgF = pTriInfos[t].iOrgFaceNumber;
+			vDstP = GetPosition(pContext, MakeIndex(iOrgF, iMissingIndex));
+			bNotFound = TTRUE;
+			i=0;
+			while (bNotFound && i<3)
+			{
+				const int iVert = pV[i];
+				const SVec3 vSrcP = GetPosition(pContext, MakeIndex(iOrgF, iVert));
+				if (veq(vSrcP, vDstP)==TTRUE)
+				{
+					const int iOffs = pTriInfos[t].iTSpacesOffs;
+					psTspace[iOffs+iMissingIndex] = psTspace[iOffs+iVert];
+					bNotFound=TFALSE;
+				}
+				else
+					++i;
+			}
+			assert(!bNotFound);
+		}
+	}
+}

+ 145 - 0
include/mikktspace/mikktspace.h

@@ -0,0 +1,145 @@
+/** \file mikktspace/mikktspace.h
+ *  \ingroup mikktspace
+ */
+/**
+ *  Copyright (C) 2011 by Morten S. Mikkelsen
+ *
+ *  This software is provided 'as-is', without any express or implied
+ *  warranty.  In no event will the authors be held liable for any damages
+ *  arising from the use of this software.
+ *
+ *  Permission is granted to anyone to use this software for any purpose,
+ *  including commercial applications, and to alter it and redistribute it
+ *  freely, subject to the following restrictions:
+ *
+ *  1. The origin of this software must not be misrepresented; you must not
+ *     claim that you wrote the original software. If you use this software
+ *     in a product, an acknowledgment in the product documentation would be
+ *     appreciated but is not required.
+ *  2. Altered source versions must be plainly marked as such, and must not be
+ *     misrepresented as being the original software.
+ *  3. This notice may not be removed or altered from any source distribution.
+ */
+
+#ifndef __MIKKTSPACE_H__
+#define __MIKKTSPACE_H__
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Author: Morten S. Mikkelsen
+ * Version: 1.0
+ *
+ * The files mikktspace.h and mikktspace.c are designed to be
+ * stand-alone files and it is important that they are kept this way.
+ * Not having dependencies on structures/classes/libraries specific
+ * to the program, in which they are used, allows them to be copied
+ * and used as is into any tool, program or plugin.
+ * The code is designed to consistently generate the same
+ * tangent spaces, for a given mesh, in any tool in which it is used.
+ * This is done by performing an internal welding step and subsequently an order-independent evaluation
+ * of tangent space for meshes consisting of triangles and quads.
+ * This means faces can be received in any order and the same is true for
+ * the order of vertices of each face. The generated result will not be affected
+ * by such reordering. Additionally, whether degenerate (vertices or texture coordinates)
+ * primitives are present or not will not affect the generated results either.
+ * Once tangent space calculation is done the vertices of degenerate primitives will simply
+ * inherit tangent space from neighboring non degenerate primitives.
+ * The analysis behind this implementation can be found in my master's thesis
+ * which is available for download --> http://image.diku.dk/projects/media/morten.mikkelsen.08.pdf
+ * Note that though the tangent spaces at the vertices are generated in an order-independent way,
+ * by this implementation, the interpolated tangent space is still affected by which diagonal is
+ * chosen to split each quad. A sensible solution is to have your tools pipeline always
+ * split quads by the shortest diagonal. This choice is order-independent and works with mirroring.
+ * If these have the same length then compare the diagonals defined by the texture coordinates.
+ * XNormal which is a tool for baking normal maps allows you to write your own tangent space plugin
+ * and also quad triangulator plugin.
+ */
+
+
+typedef int tbool;
+typedef struct SMikkTSpaceContext SMikkTSpaceContext;
+
+typedef struct {
+	// Returns the number of faces (triangles/quads) on the mesh to be processed.
+	int (*m_getNumFaces)(const SMikkTSpaceContext * pContext);
+
+	// Returns the number of vertices on face number iFace
+	// iFace is a number in the range {0, 1, ..., getNumFaces()-1}
+	int (*m_getNumVerticesOfFace)(const SMikkTSpaceContext * pContext, const int iFace);
+
+	// returns the position/normal/texcoord of the referenced face of vertex number iVert.
+	// iVert is in the range {0,1,2} for triangles and {0,1,2,3} for quads.
+	void (*m_getPosition)(const SMikkTSpaceContext * pContext, float fvPosOut[], const int iFace, const int iVert);
+	void (*m_getNormal)(const SMikkTSpaceContext * pContext, float fvNormOut[], const int iFace, const int iVert);
+	void (*m_getTexCoord)(const SMikkTSpaceContext * pContext, float fvTexcOut[], const int iFace, const int iVert);
+
+	// either (or both) of the two setTSpace callbacks can be set.
+	// The call-back m_setTSpaceBasic() is sufficient for basic normal mapping.
+
+	// This function is used to return the tangent and fSign to the application.
+	// fvTangent is a unit length vector.
+	// For normal maps it is sufficient to use the following simplified version of the bitangent which is generated at pixel/vertex level.
+	// bitangent = fSign * cross(vN, tangent);
+	// Note that the results are returned unindexed. It is possible to generate a new index list
+	// But averaging/overwriting tangent spaces by using an already existing index list WILL produce INCRORRECT results.
+	// DO NOT! use an already existing index list.
+	void (*m_setTSpaceBasic)(const SMikkTSpaceContext * pContext, const float fvTangent[], const float fSign, const int iFace, const int iVert);
+
+	// This function is used to return tangent space results to the application.
+	// fvTangent and fvBiTangent are unit length vectors and fMagS and fMagT are their
+	// true magnitudes which can be used for relief mapping effects.
+	// fvBiTangent is the "real" bitangent and thus may not be perpendicular to fvTangent.
+	// However, both are perpendicular to the vertex normal.
+	// For normal maps it is sufficient to use the following simplified version of the bitangent which is generated at pixel/vertex level.
+	// fSign = bIsOrientationPreserving ? 1.0f : (-1.0f);
+	// bitangent = fSign * cross(vN, tangent);
+	// Note that the results are returned unindexed. It is possible to generate a new index list
+	// But averaging/overwriting tangent spaces by using an already existing index list WILL produce INCRORRECT results.
+	// DO NOT! use an already existing index list.
+	void (*m_setTSpace)(const SMikkTSpaceContext * pContext, const float fvTangent[], const float fvBiTangent[], const float fMagS, const float fMagT,
+						const tbool bIsOrientationPreserving, const int iFace, const int iVert);
+} SMikkTSpaceInterface;
+
+struct SMikkTSpaceContext
+{
+	SMikkTSpaceInterface * m_pInterface;	// initialized with callback functions
+	void * m_pUserData;						// pointer to client side mesh data etc. (passed as the first parameter with every interface call)
+};
+
+// these are both thread safe!
+tbool genTangSpaceDefault(const SMikkTSpaceContext * pContext);	// Default (recommended) fAngularThreshold is 180 degrees (which means threshold disabled)
+tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThreshold);
+
+
+// To avoid visual errors (distortions/unwanted hard edges in lighting), when using sampled normal maps, the
+// normal map sampler must use the exact inverse of the pixel shader transformation.
+// The most efficient transformation we can possibly do in the pixel shader is
+// achieved by using, directly, the "unnormalized" interpolated tangent, bitangent and vertex normal: vT, vB and vN.
+// pixel shader (fast transform out)
+// vNout = normalize( vNt.x * vT + vNt.y * vB + vNt.z * vN );
+// where vNt is the tangent space normal. The normal map sampler must likewise use the
+// interpolated and "unnormalized" tangent, bitangent and vertex normal to be compliant with the pixel shader.
+// sampler does (exact inverse of pixel shader):
+// float3 row0 = cross(vB, vN);
+// float3 row1 = cross(vN, vT);
+// float3 row2 = cross(vT, vB);
+// float fSign = dot(vT, row0)<0 ? -1 : 1;
+// vNt = normalize( fSign * float3(dot(vNout,row0), dot(vNout,row1), dot(vNout,row2)) );
+// where vNout is the sampled normal in some chosen 3D space.
+//
+// Should you choose to reconstruct the bitangent in the pixel shader instead
+// of the vertex shader, as explained earlier, then be sure to do this in the normal map sampler also.
+// Finally, beware of quad triangulations. If the normal map sampler doesn't use the same triangulation of
+// quads as your renderer then problems will occur since the interpolated tangent spaces will differ
+// eventhough the vertex level tangent spaces match. This can be solved either by triangulating before
+// sampling/exporting or by using the order-independent choice of diagonal for splitting quads suggested earlier.
+// However, this must be used both by the sampler and your tools/rendering pipeline.
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

+ 9 - 6
libs/fmt/fmt.vcxproj

@@ -27,6 +27,7 @@
     </ProjectConfiguration>
   </ItemGroup>
   <ItemGroup>
+    <ClCompile Include="..\..\include\mikktspace\mikktspace.c" />
     <ClCompile Include="..\..\include\png\png.c" />
     <ClCompile Include="..\..\include\png\pngerror.c" />
     <ClCompile Include="..\..\include\png\pngget.c" />
@@ -127,9 +128,11 @@
     <ClCompile Include="..\..\include\zlib\trees.c" />
     <ClCompile Include="..\..\include\zlib\zutil.c" />
     <ClCompile Include="fmt.c" />
+    <ClCompile Include="mikkt.c" />
     <ClCompile Include="sha1.c" />
   </ItemGroup>
   <ItemGroup>
+    <ClInclude Include="..\..\include\mikktspace\mikktspace.h" />
     <ClInclude Include="..\..\include\png\png.h" />
     <ClInclude Include="..\..\include\png\pngconf.h" />
     <ClInclude Include="..\..\include\png\pngdebug.h" />
@@ -242,37 +245,37 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <LinkIncremental>true</LinkIncremental>
     <TargetExt>.hdll</TargetExt>
-    <IncludePath>$(VC_IncludePath);$(WindowsSDK_IncludePath);../../src;../../include/turbojpeg;../../include/zlib;../../include/png;../../include/vorbis</IncludePath>
+    <IncludePath>$(VC_IncludePath);$(WindowsSDK_IncludePath);../../src;../../include/turbojpeg;../../include/zlib;../../include/png;../../include/vorbis;../../include/mikktspace</IncludePath>
     <LibraryPath>$(VC_LibraryPath_x86);$(WindowsSDK_LibraryPath_x86);$(NETFXKitsDir)Lib\um\x86;../../$(Configuration);../../include/turbojpeg/x86</LibraryPath>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <LinkIncremental>true</LinkIncremental>
     <TargetExt>.hdll</TargetExt>
-    <IncludePath>$(VC_IncludePath);$(WindowsSDK_IncludePath);../../src;../../include/turbojpeg;../../include/zlib;../../include/png;../../include/vorbis</IncludePath>
+    <IncludePath>$(VC_IncludePath);$(WindowsSDK_IncludePath);../../src;../../include/turbojpeg;../../include/zlib;../../include/png;../../include/vorbis;../../include/mikktspace</IncludePath>
     <LibraryPath>$(VC_LibraryPath_x64);$(WindowsSDK_LibraryPath_x64);$(NETFXKitsDir)Lib\um\x64;../../x64/$(Configuration);../../include/turbojpeg/x64</LibraryPath>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <LinkIncremental>false</LinkIncremental>
     <TargetExt>.hdll</TargetExt>
-    <IncludePath>$(VC_IncludePath);$(WindowsSDK_IncludePath);../../src;../../include/turbojpeg;../../include/zlib;../../include/png;../../include/vorbis</IncludePath>
+    <IncludePath>$(VC_IncludePath);$(WindowsSDK_IncludePath);../../src;../../include/turbojpeg;../../include/zlib;../../include/png;../../include/vorbis;../../include/mikktspace</IncludePath>
     <LibraryPath>$(VC_LibraryPath_x86);$(WindowsSDK_LibraryPath_x86);$(NETFXKitsDir)Lib\um\x86;../../$(Configuration);../../include/turbojpeg/x86</LibraryPath>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseVS2013|Win32'">
     <LinkIncremental>false</LinkIncremental>
     <TargetExt>.hdll</TargetExt>
-    <IncludePath>$(VC_IncludePath);$(WindowsSDK_IncludePath);../../src;../../include/turbojpeg;../../include/zlib;../../include/png;../../include/vorbis</IncludePath>
+    <IncludePath>$(VC_IncludePath);$(WindowsSDK_IncludePath);../../src;../../include/turbojpeg;../../include/zlib;../../include/png;../../include/vorbis;../../include/mikktspace</IncludePath>
     <LibraryPath>$(VC_LibraryPath_x86);$(WindowsSDK_LibraryPath_x86);$(NETFXKitsDir)Lib\um\x86;../../$(Configuration);../../include/turbojpeg/x86</LibraryPath>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <LinkIncremental>false</LinkIncremental>
     <TargetExt>.hdll</TargetExt>
-    <IncludePath>$(VC_IncludePath);$(WindowsSDK_IncludePath);../../src;../../include/turbojpeg;../../include/zlib;../../include/png;../../include/vorbis</IncludePath>
+    <IncludePath>$(VC_IncludePath);$(WindowsSDK_IncludePath);../../src;../../include/turbojpeg;../../include/zlib;../../include/png;../../include/vorbis;../../include/mikktspace</IncludePath>
     <LibraryPath>$(VC_LibraryPath_x64);$(WindowsSDK_LibraryPath_x64);$(NETFXKitsDir)Lib\um\x64;../../x64/$(Configuration);../../include/turbojpeg/x64</LibraryPath>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseVS2013|x64'">
     <LinkIncremental>false</LinkIncremental>
     <TargetExt>.hdll</TargetExt>
-    <IncludePath>$(VC_IncludePath);$(WindowsSDK_IncludePath);../../src;../../include/turbojpeg;../../include/zlib;../../include/png;../../include/vorbis</IncludePath>
+    <IncludePath>$(VC_IncludePath);$(WindowsSDK_IncludePath);../../src;../../include/turbojpeg;../../include/zlib;../../include/png;../../include/vorbis;../../include/mikktspace</IncludePath>
     <LibraryPath>$(VC_LibraryPath_x64);$(WindowsSDK_LibraryPath_x64);$(NETFXKitsDir)Lib\um\x64;../../x64/$(Configuration);../../include/turbojpeg/x64</LibraryPath>
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">

+ 10 - 0
libs/fmt/fmt.vcxproj.filters

@@ -300,6 +300,10 @@
       <Filter>vorbis</Filter>
     </ClCompile>
     <ClCompile Include="sha1.c" />
+    <ClCompile Include="mikkt.c" />
+    <ClCompile Include="..\..\include\mikktspace\mikktspace.c">
+      <Filter>mikkt</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <Filter Include="turbojpeg">
@@ -314,6 +318,9 @@
     <Filter Include="vorbis">
       <UniqueIdentifier>{3f330839-1ff8-4160-8d74-6957d97e9e37}</UniqueIdentifier>
     </Filter>
+    <Filter Include="mikkt">
+      <UniqueIdentifier>{b16da6f5-be29-4433-b595-14badcd2cd06}</UniqueIdentifier>
+    </Filter>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\..\include\zlib\zconf.h">
@@ -422,5 +429,8 @@
       <Filter>vorbis</Filter>
     </ClInclude>
     <ClInclude Include="sha1.h" />
+    <ClInclude Include="..\..\include\mikktspace\mikktspace.h">
+      <Filter>mikkt</Filter>
+    </ClInclude>
   </ItemGroup>
 </Project>

+ 84 - 0
libs/fmt/mikkt.c

@@ -0,0 +1,84 @@
+#define HL_NAME(n) fmt_##n
+#include <mikktspace.h>
+#include <hl.h>
+
+typedef struct {
+	hl_type *t;
+	float *buffer;
+	int stride;
+	int xpos;
+	int normalPos;
+	int uvPos;
+	float *tangents;
+	int tangentsStride;
+	int tangentPos;
+	int *indexes;
+	int indices;
+} user_info;
+
+typedef const SMikkTSpaceContext Mikkt;
+
+static int get_num_faces( Mikkt *ctx ) {
+	user_info *i = (user_info*)ctx->m_pUserData;
+	return i->indices / 3;
+}
+
+static int get_num_vertices( Mikkt *ctx, int face ) {
+	return 3;
+}
+
+static void get_position( Mikkt *ctx, float fvPosOut[], const int iFace, const int iVert ) {
+	user_info *i = (user_info*)ctx->m_pUserData;
+	int idx = iFace * 3 + iVert;
+	int v = i->indexes[idx];
+	int p = v * i->stride + i->xpos;
+	fvPosOut[0] = i->buffer[p++];
+	fvPosOut[1] = i->buffer[p++];
+	fvPosOut[2] = i->buffer[p++];
+}
+
+static void get_normal( Mikkt *ctx, float fvPosOut[], const int iFace, const int iVert ) {
+	user_info *i = (user_info*)ctx->m_pUserData;
+	int idx = iFace * 3 + iVert;
+	int v = i->indexes[idx];
+	int p = v * i->stride + i->normalPos;
+	fvPosOut[0] = i->buffer[p++];
+	fvPosOut[1] = i->buffer[p++];
+	fvPosOut[2] = i->buffer[p++];
+}
+
+static void get_tcoord( Mikkt *ctx, float fvPosOut[], const int iFace, const int iVert ) {
+	user_info *i = (user_info*)ctx->m_pUserData;
+	int idx = iFace * 3 + iVert;
+	int v = i->indexes[idx];
+	int p = v * i->stride + i->uvPos;
+	fvPosOut[0] = i->buffer[p++];
+	fvPosOut[1] = i->buffer[p++];	
+}
+
+static void set_tangent( Mikkt *ctx, const float fvTangent[], const float fSign, const int iFace, const int iVert ) {
+	user_info *i = (user_info*)ctx->m_pUserData;
+	int idx = iFace * 3 + iVert;
+	int p = idx * i->tangentsStride + i->tangentPos;
+	i->tangents[p++] = fvTangent[0];
+	i->tangents[p++] = fvTangent[1];
+	i->tangents[p++] = fvTangent[2];
+	i->tangents[p++] = fSign;
+}
+
+HL_PRIM bool HL_NAME(compute_mikkt_tangents)( user_info *inf, double threshold ) {
+	SMikkTSpaceContext ctx;
+	SMikkTSpaceInterface intf;
+	intf.m_getNumFaces = get_num_faces;
+	intf.m_getNumVerticesOfFace = get_num_vertices;
+	intf.m_getPosition = get_position;
+	intf.m_getNormal = get_normal;
+	intf.m_getTexCoord = get_tcoord;
+	intf.m_setTSpaceBasic = set_tangent;
+	intf.m_setTSpace = NULL;
+	ctx.m_pInterface = &intf;
+	ctx.m_pUserData = inf;
+	return genTangSpace(&ctx, (float)threshold);
+}
+
+DEFINE_PRIM(_BOOL, compute_mikkt_tangents, _DYN _F64);