Browse Source

Adding support for Unicode input files to most text file loaders (BVH and MD5 missing for now).
IrrXML receives memmapped UTF-8 input data now, it's own (faulty) conversion is not used anymore.
aiString's are explicitly UTF-8 now.
Slight refactorings and improvements.
Adding UTF-8/UTF-16 text files for ASE,obj,collada,ac3d. These contain various japanese/chinese character sequences.
Changing assimp_view's node view to display UTF-8 multibyte sequences correctly.

git-svn-id: https://assimp.svn.sourceforge.net/svnroot/assimp/trunk@469 67173fc5-114c-0410-ac8e-9d2fd5bffc1f

aramis_acg 16 years ago
parent
commit
a251827cb9

+ 5 - 6
code/ACLoader.cpp

@@ -726,19 +726,18 @@ void AC3DImporter::InternReadFile( const std::string& pFile,
 	if( file.get() == NULL)
 		throw new ImportErrorException( "Failed to open AC3D file " + pFile + ".");
 
-	const unsigned int fileSize = (unsigned int)file->FileSize();
-
 	// allocate storage and copy the contents of the file to a memory buffer
-	std::vector<char> mBuffer2(fileSize+1);
-	file->Read(&mBuffer2[0], 1, fileSize);
-	mBuffer2[fileSize] = '\0';
+	std::vector<char> mBuffer2;
+	TextFileToBuffer(file.get(),mBuffer2);
+
 	buffer = &mBuffer2[0];
 	mNumMeshes = 0;
 
 	lights = polys = worlds = groups = 0;
 
-	if (::strncmp(buffer,"AC3D",4))
+	if (::strncmp(buffer,"AC3D",4)) {
 		throw new ImportErrorException("AC3D: No valid AC3D file, magic sequence not found");
+	}
 
 	// print the file format version to the console
 	unsigned int version = HexDigitToDecimal( buffer[4] );

+ 16 - 16
code/ASELoader.cpp

@@ -108,18 +108,13 @@ void ASEImporter::InternReadFile( const std::string& pFile,
 	boost::scoped_ptr<IOStream> file( pIOHandler->Open( pFile, "rb"));
 
 	// Check whether we can read from the file
-	if( file.get() == NULL)
+	if( file.get() == NULL) {
 		throw new ImportErrorException( "Failed to open ASE file " + pFile + ".");
-
-	size_t fileSize = file->FileSize();
-	if (!fileSize)
-		throw new ImportErrorException( "ASE: File is empty");
+	}
 
 	// Allocate storage and copy the contents of the file to a memory buffer
-	// (terminate it with zero)
-	std::vector<char> mBuffer2(fileSize+1);
-	file->Read( &mBuffer2[0], 1, fileSize);
-	mBuffer2[fileSize] = '\0';
+	std::vector<char> mBuffer2;
+	TextFileToBuffer(file.get(),mBuffer2);
 
 	this->mBuffer = &mBuffer2[0];
 	this->pcScene = pScene;
@@ -131,8 +126,8 @@ void ASEImporter::InternReadFile( const std::string& pFile,
 	// ------------------------------------------------------------------
 	unsigned int defaultFormat;
 	std::string::size_type s = pFile.length()-1;
-	switch (pFile.c_str()[s])
-	{
+	switch (pFile.c_str()[s])	{
+
 	case 'C':
 	case 'c':
 		defaultFormat = AI_ASE_OLD_FILE_FORMAT;
@@ -150,8 +145,8 @@ void ASEImporter::InternReadFile( const std::string& pFile,
 	// Check whether we god at least one mesh. If we did - generate
 	// materials and copy meshes. 
 	// ------------------------------------------------------------------
-	if ( !mParser->m_vMeshes.empty())
-	{
+	if ( !mParser->m_vMeshes.empty())	{
+
 		// If absolutely no material has been loaded from the file
 		// we need to generate a default material
 		GenerateDefaultMaterial();
@@ -161,12 +156,15 @@ void ASEImporter::InternReadFile( const std::string& pFile,
 		std::vector<aiMesh*> avOutMeshes;
 		avOutMeshes.reserve(mParser->m_vMeshes.size()*2);
 		for (std::vector<ASE::Mesh>::iterator i =  mParser->m_vMeshes.begin();i != mParser->m_vMeshes.end();++i)	{
-			if ((*i).bSkip)continue;
+			if ((*i).bSkip) {
+				continue;
+			}
 			BuildUniqueRepresentation(*i);
 
 			// Need to generate proper vertex normals if necessary
-			if(GenerateNormals(*i))
+			if(GenerateNormals(*i)) {
 				tookNormals = true;
+			}
 
 			// Convert all meshes to aiMesh objects
 			ConvertMeshes(*i,avOutMeshes);
@@ -181,7 +179,9 @@ void ASEImporter::InternReadFile( const std::string& pFile,
 		pScene->mNumMeshes = (unsigned int)avOutMeshes.size();
 		aiMesh** pp = pScene->mMeshes = new aiMesh*[pScene->mNumMeshes];
 		for (std::vector<aiMesh*>::const_iterator i =  avOutMeshes.begin();i != avOutMeshes.end();++i) {
-			if (!(*i)->mNumFaces)continue;
+			if (!(*i)->mNumFaces) {
+				continue;
+			}
 			*pp++ = *i;
 		}
 		pScene->mNumMeshes = (unsigned int)(pp - pScene->mMeshes);

+ 127 - 5
code/BaseImporter.cpp

@@ -195,8 +195,9 @@ void BaseImporter::SetupProperties(const Importer* pImp)
 {
 	ai_assert(size <= 16 && _magic);
 
-	if (!pIOHandler)
+	if (!pIOHandler) {
 		return false;
+	}
 
 	const char* magic = (const char*)_magic;
 	boost::scoped_ptr<IOStream> pStream (pIOHandler->Open(pFile));
@@ -207,8 +208,9 @@ void BaseImporter::SetupProperties(const Importer* pImp)
 
 		// read 'size' characters from the file
 		char data[16];
-		if(size != pStream->Read(data,1,size))
+		if(size != pStream->Read(data,1,size)) {
 			return false;
+		}
 
 		for (unsigned int i = 0; i < num; ++i) {
 			// also check against big endian versions of tokens with size 2,4
@@ -217,19 +219,22 @@ void BaseImporter::SetupProperties(const Importer* pImp)
 			if (2 == size) {
 				int16_t rev = *((int16_t*)magic);
 				ByteSwap::Swap(&rev);
-				if (*((int16_t*)data) == ((int16_t*)magic)[i] || *((int16_t*)data) == rev)
+				if (*((int16_t*)data) == ((int16_t*)magic)[i] || *((int16_t*)data) == rev) {
 					return true;
+				}
 			}
 			else if (4 == size) {
 				int32_t rev = *((int32_t*)magic);
 				ByteSwap::Swap(&rev);
-				if (*((int32_t*)data) == ((int32_t*)magic)[i] || *((int32_t*)data) == rev)
+				if (*((int32_t*)data) == ((int32_t*)magic)[i] || *((int32_t*)data) == rev) {
 					return true;
+				}
 			}
 			else {
 				// any length ... just compare
-				if(!::memcmp(magic,data,size))
+				if(!memcmp(magic,data,size)) {
 					return true;
+				}
 			}
 			magic += size;
 		}
@@ -237,6 +242,123 @@ void BaseImporter::SetupProperties(const Importer* pImp)
 	return false;
 }
 
+#include "../contrib/ConvertUTF/ConvertUTF.h"
+
+// ------------------------------------------------------------------------------------------------
+void ReportResult(ConversionResult res)
+{
+	if(res == sourceExhausted) {
+		DefaultLogger::get()->error("Source ends with incomplete character sequence, Unicode transformation to UTF-8 fails");
+	}
+	else if(res == sourceIllegal) {
+		DefaultLogger::get()->error("Source contains illegal character sequence, Unicode transformation to UTF-8 fails");
+	}
+}
+
+// ------------------------------------------------------------------------------------------------
+// Convert to UTF8 data
+void BaseImporter::ConvertToUTF8(std::vector<char>& data)
+{
+	ConversionResult result;
+	if(data.size() < 8) {
+		throw new ImportErrorException("File is too small");
+	}
+
+	// UTF 8 with BOM
+	if((uint8_t)data[0] == 0xEF && (uint8_t)data[1] == 0xBB && (uint8_t)data[2] == 0xBF) {
+		DefaultLogger::get()->debug("Found UTF-8 BOM ...");
+
+		std::copy(data.begin()+3,data.end(),data.begin());
+		data.resize(data.size()-3);
+		return;
+	}
+
+	// UTF 32 BE with BOM
+	if(*((uint32_t*)&data.front()) == 0xFFFE0000) {
+	
+		// swap the endianess ..
+		for(uint32_t* p = (uint32_t*)&data.front(), *end = (uint32_t*)&data.back(); p <= end; ++p) {
+			AI_SWAP4P(p);
+		}
+	}
+	
+	// UTF 32 LE with BOM
+	if(*((uint32_t*)&data.front()) == 0x0000FFFE) {
+		DefaultLogger::get()->debug("Found UTF-32 BOM ...");
+
+		const uint32_t* sstart = (uint32_t*)&data.front()+1, *send = (uint32_t*)&data.back()+1;
+		char* dstart,*dend;
+		std::vector<char> output;
+		do {
+			output.resize(output.size()?output.size()*3/2:data.size()/2);
+			dstart = &output.front(),dend = &output.back()+1;
+
+			result = ConvertUTF32toUTF8((const UTF32**)&sstart,(const UTF32*)send,(UTF8**)&dstart,(UTF8*)dend,lenientConversion);
+		} while(result == targetExhausted);
+
+		ReportResult(result);
+
+		// copy to output buffer. 
+		const size_t outlen = (size_t)(dstart-&output.front());
+		data.assign(output.begin(),output.begin()+outlen);
+		return;
+	}
+
+	// UTF 16 BE with BOM
+	if(*((uint16_t*)&data.front()) == 0xFFFE) {
+	
+		// swap the endianess ..
+		for(uint16_t* p = (uint16_t*)&data.front(), *end = (uint16_t*)&data.back(); p <= end; ++p) {
+			ByteSwap::Swap2(p);
+		}
+	}
+	
+	// UTF 16 LE with BOM
+	if(*((uint16_t*)&data.front()) == 0xFEFF) {
+		DefaultLogger::get()->debug("Found UTF-16 BOM ...");
+
+		const uint16_t* sstart = (uint16_t*)&data.front()+1, *send = (uint16_t*)&data.back()+1;
+		char* dstart,*dend;
+		std::vector<char> output;
+		do {
+			output.resize(output.size()?output.size()*3/2:data.size()*3/4);
+			dstart = &output.front(),dend = &output.back()+1;
+
+			result = ConvertUTF16toUTF8((const UTF16**)&sstart,(const UTF16*)send,(UTF8**)&dstart,(UTF8*)dend,lenientConversion);
+		} while(result == targetExhausted);
+
+		ReportResult(result);
+
+		// copy to output buffer.
+		const size_t outlen = (size_t)(dstart-&output.front());
+		data.assign(output.begin(),output.begin()+outlen);
+		return;
+	}
+}
+
+// ------------------------------------------------------------------------------------------------
+void BaseImporter::TextFileToBuffer(IOStream* stream,
+	std::vector<char>& data)
+{
+	ai_assert(NULL != stream);
+
+	const size_t fileSize = stream->FileSize();
+	if(!fileSize) {
+		throw new ImportErrorException("File is empty");
+	}
+
+	data.reserve(fileSize+1); 
+	data.resize(fileSize); 
+	if(fileSize != stream->Read( &data[0], 1, fileSize)) {
+		throw new ImportErrorException("File read error");
+	}
+
+	ConvertToUTF8(data);
+
+	// append a binary zero to simplify string parsing
+	data.push_back(0);
+}
+
 // ------------------------------------------------------------------------------------------------
 namespace Assimp
 {

+ 17 - 14
code/BaseImporter.h

@@ -201,7 +201,6 @@ public:
 	 */
 	aiScene* ReadFile( const std::string& pFile, IOSystem* pIOHandler);
 
-
 	// -------------------------------------------------------------------
 	/** Returns the error description of the last error that occured. 
 	 * @return A description of the last error that occured. An empty
@@ -211,7 +210,6 @@ public:
 		return mErrorText;
 	}
 
-
 	// -------------------------------------------------------------------
 	/** Called prior to ReadFile().
 	 * The function is a request to the importer to update its configuration
@@ -283,6 +281,7 @@ protected:
 	virtual void InternReadFile( const std::string& pFile, 
 		aiScene* pScene, IOSystem* pIOHandler) = 0;
 
+public: // static utilities
 
 	// -------------------------------------------------------------------
 	/** A utility for CanRead().
@@ -345,20 +344,24 @@ protected:
 		unsigned int offset = 0,
 		unsigned int size   = 4);
 
-#if 0 /** TODO **/
 	// -------------------------------------------------------------------
 	/** An utility for all text file loaders. It converts a file to our
-	*  ASCII/UTF8 character set. Special unicode characters are lost.
-	*
-	*  @param buffer Input buffer. Needn't be terminated with zero.
-	 *  @param length Length of the input buffer, in bytes. Receives the
-	 *    number of output characters, excluding the terminal char.
-	 *  @return true if the source format did not match our internal
-	 *    format so it was converted.
-	 */
-	static bool ConvertToUTF8(const char* buffer, 
-		unsigned int& length);
-#endif
+	 *   UTF8 character set. Errors are reported, but ignored.
+	 *
+	 *  @param data File buffer to be converted to UTF8 data. The buffer 
+	 *  is resized as appropriate. */
+	static void ConvertToUTF8(std::vector<char>& data);
+
+	// -------------------------------------------------------------------
+	/** Utility for text file loaders which copies the contents of the
+	 *  file into a memory buffer and converts it to our UTF8
+	 *  representation.
+	 *  @param stream Stream to read from. 
+	 *  @param data Output buffer to be resized and filled with the
+	 *   converted text file data. The buffer is terminated with
+	 *   a binary 0. */
+	static void TextFileToBuffer(IOStream* stream,
+		std::vector<char>& data);
 
 protected:
 

+ 6 - 0
code/CMakeLists.txt

@@ -298,6 +298,11 @@ SOURCE_GROUP( IrrXML FILES
 	../contrib/irrXML/irrXML.h
 )
 
+SOURCE_GROUP( ConvertUTF FILES
+	../contrib/ConvertUTF/ConvertUTF.h
+	../contrib/ConvertUTF/ConvertUTF.c
+)
+
 SOURCE_GROUP( zlib FILES
 	../contrib/zlib/adler32.c
 	../contrib/zlib/compress.c
@@ -567,6 +572,7 @@ ADD_LIBRARY( assimp SHARED
 	../contrib/zlib/zlib.h
 	../contrib/zlib/zutil.c
 	../contrib/zlib/zutil.h
+	../contrib/ConvertUTF/ConvertUTF.c
 )
 ADD_DEFINITIONS(-DASSIMP_BUILD_DLL_EXPORT)
 

+ 4 - 6
code/CSMLoader.cpp

@@ -103,15 +103,13 @@ void CSMImporter::InternReadFile( const std::string& pFile,
 	boost::scoped_ptr<IOStream> file( pIOHandler->Open( pFile, "rb"));
 
 	// Check whether we can read from the file
-	if( file.get() == NULL)
+	if( file.get() == NULL) {
 		throw new ImportErrorException( "Failed to open CSM file " + pFile + ".");
-
-	size_t fileSize = file->FileSize();
+	}
 
 	// allocate storage and copy the contents of the file to a memory buffer
-	std::vector<char> mBuffer2(fileSize+1);
-	file->Read(&mBuffer2[0], 1, fileSize);mBuffer2[fileSize] = '\0';
-
+	std::vector<char> mBuffer2;
+	TextFileToBuffer(file.get(),mBuffer2);
 	const char* buffer = &mBuffer2[0];
 
 	aiAnimation* anim = new aiAnimation();

+ 6 - 7
code/DXFLoader.cpp

@@ -161,15 +161,14 @@ void DXFImporter::InternReadFile( const std::string& pFile,
 	boost::scoped_ptr<IOStream> file( pIOHandler->Open( pFile));
 
 	// Check whether we can read from the file
-	if( file.get() == NULL)
+	if( file.get() == NULL) {
 		throw new ImportErrorException( "Failed to open DXF file " + pFile + "");
+	}
 
 	// read the contents of the file in a buffer
-	size_t m = file->FileSize();
-	std::vector<char> buffer2(m+1);
+	std::vector<char> buffer2;
+	TextFileToBuffer(file.get(),buffer2);
 	buffer = &buffer2[0];
-	file->Read( &buffer2[0], m,1);
-	buffer2[m] = '\0';
 
 	bRepeat = false;
 	mDefaultLayer = NULL;
@@ -216,7 +215,7 @@ void DXFImporter::InternReadFile( const std::string& pFile,
 		throw new ImportErrorException("DXF: this file contains no 3d data");
 
 	pScene->mMeshes = new aiMesh*[ pScene->mNumMeshes ];
-	m = 0;
+	unsigned int m = 0;
 	for (std::vector<LayerInfo>::const_iterator it = mLayers.begin(),end = mLayers.end();it != end;++it) {
 		if ((*it).vPositions.empty()) {
 			continue;
@@ -288,7 +287,7 @@ void DXFImporter::InternReadFile( const std::string& pFile,
 		for (m = 0; m < pScene->mRootNode->mNumChildren;++m)	{
 			aiNode* p = pScene->mRootNode->mChildren[m] = new aiNode();
 			p->mName.length = ::strlen( mLayers[m].name );
-			::strcpy(p->mName.data, mLayers[m].name);
+			strcpy(p->mName.data, mLayers[m].name);
 
 			p->mMeshes = new unsigned int[p->mNumMeshes = 1];
 			p->mMeshes[0] = m;

+ 4 - 4
code/LWSLoader.cpp

@@ -469,13 +469,13 @@ void LWSImporter::InternReadFile( const std::string& pFile, aiScene* pScene,
 	boost::scoped_ptr<IOStream> file( pIOHandler->Open( pFile, "rb"));
 
 	// Check whether we can read from the file
-	if( file.get() == NULL)
+	if( file.get() == NULL) {
 		throw new ImportErrorException( "Failed to open LWS file " + pFile + ".");
+	}
 
 	// Allocate storage and copy the contents of the file to a memory buffer
-	const size_t fileSize = file->FileSize();
-	std::vector< char > mBuffer(fileSize);
-	file->Read( &mBuffer[0], 1, fileSize);
+	std::vector< char > mBuffer;
+	TextFileToBuffer(file.get(),mBuffer);
 	
 	// Parse the file structure
 	LWS::Element root; const char* dummy = &mBuffer[0];

+ 6 - 11
code/NFFLoader.cpp

@@ -117,8 +117,7 @@ void NFFImporter::LoadNFF2MaterialTable(std::vector<ShadingInfo>& output,
 	boost::scoped_ptr<IOStream> file( pIOHandler->Open( path, "rb"));
 
 	// Check whether we can read from the file
-	if( !file.get())
-	{
+	if( !file.get())	{
 		DefaultLogger::get()->error("NFF2: Unable to open material library " + path + ".");
 		return;
 	}
@@ -129,16 +128,14 @@ void NFFImporter::LoadNFF2MaterialTable(std::vector<ShadingInfo>& output,
 	// allocate storage and copy the contents of the file to a memory buffer
 	// (terminate it with zero)
 	std::vector<char> mBuffer2(m+1);
-	file->Read(&mBuffer2[0],m,1);
+	TextFileToBuffer(file.get(),mBuffer2);
 	const char* buffer = &mBuffer2[0];
-	mBuffer2[m] = '\0';
 
 	// First of all: remove all comments from the file
 	CommentRemover::RemoveLineComments("//",&mBuffer2[0]);
 
 	// The file should start with the magic sequence "mat"
-	if (!TokenMatch(buffer,"mat",3))
-	{
+	if (!TokenMatch(buffer,"mat",3))	{
 		DefaultLogger::get()->error("NFF2: Not a valid material library " + path + ".");
 		return;
 	}
@@ -229,13 +226,11 @@ void NFFImporter::InternReadFile( const std::string& pFile,
 
 	// allocate storage and copy the contents of the file to a memory buffer
 	// (terminate it with zero)
-	std::vector<char> mBuffer2(m+1);
-	file->Read(&mBuffer2[0],m,1);
+	std::vector<char> mBuffer2;
+	TextFileToBuffer(file.get(),mBuffer2);
 	const char* buffer = &mBuffer2[0];
-	mBuffer2[m] = '\0';
 
-	// mesh arrays - separate here to make the handling of
-	// the pointers below easier.
+	// mesh arrays - separate here to make the handling of the pointers below easier.
 	std::vector<MeshInfo> meshes;
 	std::vector<MeshInfo> meshesWithNormals;
 	std::vector<MeshInfo> meshesWithUVCoords;

+ 9 - 9
code/OFFLoader.cpp

@@ -90,25 +90,25 @@ void OFFImporter::GetExtensionList(std::string& append)
 // ------------------------------------------------------------------------------------------------
 // Imports the given file into the given scene structure. 
 void OFFImporter::InternReadFile( const std::string& pFile, 
-								 aiScene* pScene, IOSystem* pIOHandler)
+	aiScene* pScene, IOSystem* pIOHandler)
 {
 	boost::scoped_ptr<IOStream> file( pIOHandler->Open( pFile, "rb"));
 
 	// Check whether we can read from the file
-	if( file.get() == NULL)
+	if( file.get() == NULL) {
 		throw new ImportErrorException( "Failed to open OFF file " + pFile + ".");
-
-	unsigned int fileSize = (unsigned int)file->FileSize();
-
+	}
+	
 	// allocate storage and copy the contents of the file to a memory buffer
-	std::vector<char> mBuffer2(fileSize+1);
-	file->Read(&mBuffer2[0], 1, fileSize);
-	mBuffer2[fileSize] = '\0';
+	std::vector<char> mBuffer2;
+	TextFileToBuffer(file.get(),mBuffer2);
 	const char* buffer = &mBuffer2[0];
 
 	char line[4096];
 	GetNextLine(buffer,line);
-	if ('O' == line[0])GetNextLine(buffer,line); // skip the 'OFF' line
+	if ('O' == line[0]) {
+		GetNextLine(buffer,line); // skip the 'OFF' line
+	}
 
 	const char* sz = line; SkipSpaces(&sz);
 	const unsigned int numVertices = strtol10(sz,&sz);SkipSpaces(&sz);

+ 4 - 4
code/ObjFileImporter.cpp

@@ -101,10 +101,7 @@ void ObjFileImporter::InternReadFile( const std::string& pFile, aiScene* pScene,
 		throw new ImportErrorException( "OBJ-file is too small.");
 
 	// Allocate buffer and read file into it
-	m_Buffer.resize( fileSize + 1 );
-	m_Buffer[ fileSize ] = '\0';
-	const size_t readsize = file->Read( &m_Buffer.front(), sizeof(char), fileSize );
-	assert( readsize == fileSize );
+	TextFileToBuffer(file.get(),m_Buffer);
 
 	//
 	std::string strDirectory( 1, io.getOsSeparator() ), strModelName;
@@ -124,6 +121,9 @@ void ObjFileImporter::InternReadFile( const std::string& pFile, aiScene* pScene,
 
 	// And create the proper return structures out of it
 	CreateDataFromImport(parser.GetModel(), pScene);
+
+	// Clean up allocated storage for the next import 
+	m_Buffer.clear();
 }
 
 // ------------------------------------------------------------------------------------------------

+ 2 - 4
code/ObjFileParser.cpp

@@ -452,10 +452,8 @@ void ObjFileParser::getMaterialLib()
 	}
 
 	// Import material library data from file
-	size_t size = pFile->FileSize();
-	std::vector<char> buffer( size + 1 );
-	buffer[ size ] = '\0';
-	pFile->Read( &buffer[ 0 ], sizeof( char ), size );
+	std::vector<char> buffer;
+	BaseImporter::TextFileToBuffer(pFile,buffer);
 	io->Close( pFile );
 
 	// Importing the material library 

+ 7 - 14
code/PlyLoader.cpp

@@ -87,27 +87,20 @@ void PLYImporter::GetExtensionList(std::string& append)
 
 // ------------------------------------------------------------------------------------------------
 // Imports the given file into the given scene structure. 
-void PLYImporter::InternReadFile( 
-								 const std::string& pFile, aiScene* pScene, IOSystem* pIOHandler)
+void PLYImporter::InternReadFile( const std::string& pFile, 
+	aiScene* pScene, IOSystem* pIOHandler)
 {
 	boost::scoped_ptr<IOStream> file( pIOHandler->Open( pFile));
 
 	// Check whether we can read from the file
-	if( file.get() == NULL)
+	if( file.get() == NULL) {
 		throw new ImportErrorException( "Failed to open PLY file " + pFile + ".");
-
-	// check whether the ply file is large enough to contain
-	// at least the file header
-	size_t fileSize = file->FileSize();
-	if( fileSize < 10)
-		throw new ImportErrorException( "PLY File is too small.");
+	}
 
 	// allocate storage and copy the contents of the file to a memory buffer
-	// (terminate it with zero)
-	std::vector<unsigned char> mBuffer2(fileSize+1);
-	file->Read( &mBuffer2[0], 1, fileSize);
-	mBuffer = &mBuffer2[0];
-	mBuffer[fileSize] = '\0';
+	std::vector<char> mBuffer2;
+	TextFileToBuffer(file.get(),mBuffer2);
+	mBuffer = (unsigned char*)&mBuffer2[0];
 
 	// the beginning of the file must be PLY - magic, magic
 	if (mBuffer[0] != 'P' && mBuffer[0] != 'p' ||

+ 4 - 7
code/RawLoader.cpp

@@ -85,17 +85,14 @@ void RAWImporter::InternReadFile( const std::string& pFile,
 	boost::scoped_ptr<IOStream> file( pIOHandler->Open( pFile, "rb"));
 
 	// Check whether we can read from the file
-	if( file.get() == NULL)
+	if( file.get() == NULL) {
 		throw new ImportErrorException( "Failed to open RAW file " + pFile + ".");
-
-	unsigned int fileSize = (unsigned int)file->FileSize();
+	}
 
 	// allocate storage and copy the contents of the file to a memory buffer
 	// (terminate it with zero)
-	std::vector<char> mBuffer2(fileSize+1);
-	
-	file->Read(&mBuffer2[0], 1, fileSize);
-	mBuffer2[fileSize] = '\0';
+	std::vector<char> mBuffer2;
+	TextFileToBuffer(file.get(),mBuffer2);
 	const char* buffer = &mBuffer2[0];
 
 	// list of groups loaded from the file

+ 2 - 4
code/SMDLoader.cpp

@@ -99,8 +99,7 @@ void SMDImporter::InternReadFile(
 	boost::scoped_ptr<IOStream> file( pIOHandler->Open( pFile, "rt"));
 
 	// Check whether we can read from the file
-	if( file.get() == NULL)
-	{
+	if( file.get() == NULL)	{
 		throw new ImportErrorException( "Failed to open SMD/VTA file " + pFile + ".");
 	}
 
@@ -110,8 +109,7 @@ void SMDImporter::InternReadFile(
 	this->pScene = pScene;
 
 	std::vector<char> buff(iFileSize+1);
-	file->Read( &buff[0], 1, iFileSize);
-	buff[iFileSize] = '\0';
+	TextFileToBuffer(file.get(),buff);
 	mBuffer = &buff[0];
 
 	iSmallestFrame = (1 << 31);

+ 61 - 55
code/STLLoader.cpp

@@ -87,25 +87,22 @@ void STLImporter::GetExtensionList(std::string& append)
 
 // ------------------------------------------------------------------------------------------------
 // Imports the given file into the given scene structure. 
-void STLImporter::InternReadFile( 
-								 const std::string& pFile, aiScene* pScene, IOSystem* pIOHandler)
+void STLImporter::InternReadFile( const std::string& pFile, 
+	aiScene* pScene, IOSystem* pIOHandler)
 {
 	boost::scoped_ptr<IOStream> file( pIOHandler->Open( pFile, "rb"));
 
 	// Check whether we can read from the file
-	if( file.get() == NULL)
-	{
+	if( file.get() == NULL)	{
 		throw new ImportErrorException( "Failed to open STL file " + pFile + ".");
 	}
 
-	this->fileSize = (unsigned int)file->FileSize();
+	fileSize = (unsigned int)file->FileSize();
 
 	// allocate storage and copy the contents of the file to a memory buffer
 	// (terminate it with zero)
-	std::vector<char> mBuffer2(fileSize+1);
-	
-	file->Read(&mBuffer2[0], 1, fileSize);
-	mBuffer2[fileSize] = '\0';
+	std::vector<char> mBuffer2;
+	TextFileToBuffer(file.get(),mBuffer2);
 
 	this->pScene = pScene;
 	this->mBuffer = &mBuffer2[0];
@@ -129,18 +126,20 @@ void STLImporter::InternReadFile(
 
 	// check whether the file starts with 'solid' -
 	// in this case we can simply assume it IS a text file. finished.
-	if (!::strncmp(mBuffer,"solid",5))
-		this->LoadASCIIFile();
-	else bMatClr = this->LoadBinaryFile();
+	if (!::strncmp(mBuffer,"solid",5)) {
+		LoadASCIIFile();
+	}
+	else bMatClr = LoadBinaryFile();
 
 	// now copy faces
 	pMesh->mFaces = new aiFace[pMesh->mNumFaces];
-	for (unsigned int i = 0, p = 0; i < pMesh->mNumFaces;++i)
-	{
+	for (unsigned int i = 0, p = 0; i < pMesh->mNumFaces;++i)	{
+
 		aiFace& face = pMesh->mFaces[i];
 		face.mIndices = new unsigned int[face.mNumIndices = 3];
-		for (unsigned int o = 0; o < 3;++o,++p)
+		for (unsigned int o = 0; o < 3;++o,++p) {
 			face.mIndices[o] = p;
+		}
 	}
 
 	// create a single default material - everything white, as we have vertex colors
@@ -150,7 +149,9 @@ void STLImporter::InternReadFile(
 	pcMat->AddProperty(&s, AI_MATKEY_NAME);
 
 	aiColor4D clrDiffuse(1.0f,1.0f,1.0f,1.0f);
-	if (bMatClr)clrDiffuse = this->clrColorDefault;
+	if (bMatClr) {
+		clrDiffuse = clrColorDefault;
+	}
 	pcMat->AddProperty(&clrDiffuse,1,AI_MATKEY_COLOR_DIFFUSE);
 	pcMat->AddProperty(&clrDiffuse,1,AI_MATKEY_COLOR_SPECULAR);
 	clrDiffuse = aiColor4D(0.05f,0.05f,0.05f,1.0f);
@@ -169,14 +170,16 @@ void STLImporter::LoadASCIIFile()
 	const char* sz = mBuffer + 5; // skip the "solid"
 	SkipSpaces(&sz);
 	const char* szMe = sz;
-	while (!::IsSpaceOrNewLine(*sz))sz++;
-	unsigned int temp;
+	while (!::IsSpaceOrNewLine(*sz)) {
+		sz++;
+	}
 
+	size_t temp;
 	// setup the name of the node
-	if ((temp = (unsigned int)(sz-szMe)))
-	{
+	if ((temp = (size_t)(sz-szMe)))	{
+
 		pScene->mRootNode->mName.length = temp;
-		::memcpy(pScene->mRootNode->mName.data,szMe,temp);
+		memcpy(pScene->mRootNode->mName.data,szMe,temp);
 		pScene->mRootNode->mName.data[temp] = '\0';
 	}
 	else pScene->mRootNode->mName.Set("<STL_ASCII>");
@@ -185,7 +188,7 @@ void STLImporter::LoadASCIIFile()
 	// assume we'll need 160 bytes for each face
 	pMesh->mNumVertices = ( pMesh->mNumFaces = fileSize / 160 ) * 3;
 	pMesh->mVertices = new aiVector3D[pMesh->mNumVertices];
-	pMesh->mNormals = new aiVector3D[pMesh->mNumVertices];
+	pMesh->mNormals  = new aiVector3D[pMesh->mNumVertices];
 	
 	unsigned int curFace = 0, curVertex = 3;
 	while (true)
@@ -198,11 +201,12 @@ void STLImporter::LoadASCIIFile()
 			break;
 		}
 		// facet normal -0.13 -0.13 -0.98
-		if (!::strncmp(sz,"facet",5) && ::IsSpaceOrNewLine(*(sz+5)))
-		{
-			if (3 != curVertex)DefaultLogger::get()->warn("STL: A new facet begins but the old is not yet complete");
-			if (pMesh->mNumFaces == curFace)
-			{
+		if (!strncmp(sz,"facet",5) && IsSpaceOrNewLine(*(sz+5)))	{
+
+			if (3 != curVertex) {
+				DefaultLogger::get()->warn("STL: A new facet begins but the old is not yet complete");
+			}
+			if (pMesh->mNumFaces == curFace)	{
 				// need to resize the arrays, our size estimate was wrong
 				unsigned int iNeededSize = (unsigned int)(sz-mBuffer) / pMesh->mNumFaces;
 				if (iNeededSize <= 160)iNeededSize >>= 1; // prevent endless looping
@@ -210,11 +214,11 @@ void STLImporter::LoadASCIIFile()
 				add += add >> 3; // add 12.5% as buffer
 				iNeededSize = (pMesh->mNumFaces + add)*3;
 				aiVector3D* pv = new aiVector3D[iNeededSize];
-				::memcpy(pv,pMesh->mVertices,pMesh->mNumVertices*sizeof(aiVector3D));
+				memcpy(pv,pMesh->mVertices,pMesh->mNumVertices*sizeof(aiVector3D));
 				delete[] pMesh->mVertices;
 				pMesh->mVertices = pv;
 				pv = new aiVector3D[iNeededSize];
-				::memcpy(pv,pMesh->mNormals,pMesh->mNumVertices*sizeof(aiVector3D));
+				memcpy(pv,pMesh->mNormals,pMesh->mNumVertices*sizeof(aiVector3D));
 				delete[] pMesh->mNormals;
 				pMesh->mNormals = pv;
 
@@ -226,8 +230,7 @@ void STLImporter::LoadASCIIFile()
 			sz += 6;
 			curVertex = 0;
 			SkipSpaces(&sz);
-			if (::strncmp(sz,"normal",6))
-			{
+			if (strncmp(sz,"normal",6))	{
 				DefaultLogger::get()->warn("STL: a facet normal vector was expected but not found");
 			}
 			else
@@ -244,10 +247,9 @@ void STLImporter::LoadASCIIFile()
 			}
 		}
 		// vertex 1.50000 1.50000 0.00000
-		else if (!::strncmp(sz,"vertex",6) && ::IsSpaceOrNewLine(*(sz+6)))
+		else if (!strncmp(sz,"vertex",6) && ::IsSpaceOrNewLine(*(sz+6)))
 		{
-			if (3 == curVertex)
-			{
+			if (3 == curVertex)	{
 				DefaultLogger::get()->error("STL: a facet with more than 3 vertices has been found");
 			}
 			else
@@ -262,17 +264,17 @@ void STLImporter::LoadASCIIFile()
 				sz = fast_atof_move(sz, (float&)vn->z ); 
 			}
 		}
-		else if (!::strncmp(sz,"endsolid",8))
-		{
+		else if (!::strncmp(sz,"endsolid",8))	{
 			// finished!
 			break;
 		}
 		// else skip the whole identifier
-		else while (!::IsSpaceOrNewLine(*sz))++sz;
+		else while (!::IsSpaceOrNewLine(*sz)) {
+			++sz;
+		}
 	}
 
-	if (!curFace)
-	{
+	if (!curFace)	{
 		pMesh->mNumFaces = 0;
 		throw new ImportErrorException("STL: ASCII file is empty or invalid; no data loaded");
 	}
@@ -280,31 +282,32 @@ void STLImporter::LoadASCIIFile()
 	pMesh->mNumVertices = curFace*3;
 	// we are finished!
 }
+
 // ------------------------------------------------------------------------------------------------
 // Read a binary STL file
 bool STLImporter::LoadBinaryFile()
 {
 	// skip the first 80 bytes
-	if (fileSize < 84)
+	if (fileSize < 84) {
 		throw new ImportErrorException("STL: file is too small for the header");
-
+	}
 	bool bIsMaterialise = false;
 
 	// search for an occurence of "COLOR=" in the header
 	const char* sz2 = (const char*)mBuffer;
 	const char* const szEnd = sz2+80;
-	while (sz2 < szEnd)
-	{
+	while (sz2 < szEnd)	{
+
 		if ('C' == *sz2++ && 'O' == *sz2++ && 'L' == *sz2++ &&
-			'O' == *sz2++ && 'R' == *sz2++ && '=' == *sz2++)
-		{
+			'O' == *sz2++ && 'R' == *sz2++ && '=' == *sz2++)	{
+
 			// read the default vertex color for facets
 			bIsMaterialise = true;
 			DefaultLogger::get()->info("STL: Taking code path for Materialise files");
-			this->clrColorDefault.r = (*sz2++) / 255.0f;
-			this->clrColorDefault.g = (*sz2++) / 255.0f;
-			this->clrColorDefault.b = (*sz2++) / 255.0f;
-			this->clrColorDefault.a = (*sz2++) / 255.0f;
+			clrColorDefault.r = (*sz2++) / 255.0f;
+			clrColorDefault.g = (*sz2++) / 255.0f;
+			clrColorDefault.b = (*sz2++) / 255.0f;
+			clrColorDefault.a = (*sz2++) / 255.0f;
 			break;
 		}
 	}
@@ -317,10 +320,13 @@ bool STLImporter::LoadBinaryFile()
 	pMesh->mNumFaces = *((uint32_t*)sz);
 	sz += 4;
 
-	if (fileSize < 84 + pMesh->mNumFaces*50)
-		throw new ImportErrorException("STL: file is too small to keep all facets");
-	if (!pMesh->mNumFaces)
+	if (fileSize < 84 + pMesh->mNumFaces*50) {
+		throw new ImportErrorException("STL: file is too small to hold all facets");
+	}
+
+	if (!pMesh->mNumFaces) {
 		throw new ImportErrorException("STL: file is empty. There are no facets defined");
+	}
 
 	pMesh->mNumVertices = pMesh->mNumFaces*3;
 
@@ -328,9 +334,9 @@ bool STLImporter::LoadBinaryFile()
 	vp = pMesh->mVertices = new aiVector3D[pMesh->mNumVertices];
 	vn = pMesh->mNormals = new aiVector3D[pMesh->mNumVertices];
 
-	for (unsigned int i = 0; i < pMesh->mNumFaces;++i)
-	{
-		// NOTE: Blender sometimes writes empty normals this is not
+	for (unsigned int i = 0; i < pMesh->mNumFaces;++i)	{
+
+		// NOTE: Blender sometimes writes empty normals ... this is not
 		// our fault ... the RemoveInvalidData helper step should fix that
 		*vn = *((aiVector3D*)sz);
 		sz += sizeof(aiVector3D);

+ 2 - 5
code/UnrealLoader.cpp

@@ -217,11 +217,8 @@ void UnrealImporter::InternReadFile( const std::string& pFile,
 	boost::scoped_ptr<IOStream> pb (pIOHandler->Open(uc_path));
 	if (pb.get())	{
 
-		size_t s = pb->FileSize();
-		std::vector<char> _data(s+1);
-		pb->Read(&_data[0],s,1);
-
-		_data[s] = 0;
+		std::vector<char> _data;
+		TextFileToBuffer(pb.get(),_data);
 		const char* data = &_data[0];
 
 		std::vector< std::pair< std::string,std::string > > tempTextures;

+ 2 - 0
code/XFileImporter.cpp

@@ -96,8 +96,10 @@ void XFileImporter::InternReadFile( const std::string& pFile, aiScene* pScene, I
 	if( fileSize < 16)
 		throw new ImportErrorException( "XFile is too small.");
 
+	// in the hope that binary files will never start with a BOM ...
 	mBuffer.resize( fileSize);
 	file->Read( &mBuffer.front(), 1, fileSize);
+	ConvertToUTF8(mBuffer);
 
 	// parse the file into a temporary representation
 	XFileParser parser( mBuffer);

+ 2 - 1
code/XFileParser.cpp

@@ -249,8 +249,9 @@ XFileParser::XFileParser( const std::vector<char>& pBuffer)
 	ParseFile();
 
 	// filter the imported hierarchy for some degenerated cases
-	if( mScene->mRootNode)
+	if( mScene->mRootNode) {
 		FilterHierarchy( mScene->mRootNode);
+	}
 }
 
 // ------------------------------------------------------------------------------------------------

+ 77 - 12
code/irrXMLWrapper.h

@@ -1,3 +1,42 @@
+/*
+Open Asset Import Library (ASSIMP)
+----------------------------------------------------------------------
+
+Copyright (c) 2006-2008, ASSIMP Development Team
+All rights reserved.
+
+Redistribution and use of this software in source and binary forms, 
+with or without modification, are permitted provided that the 
+following conditions are met:
+
+* Redistributions of source code must retain the above
+copyright notice, this list of conditions and the
+following disclaimer.
+
+* Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the
+following disclaimer in the documentation and/or other
+materials provided with the distribution.
+
+* Neither the name of the ASSIMP team, nor the names of its
+contributors may be used to endorse or promote products
+derived from this software without specific prior
+written permission of the ASSIMP Development Team.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+----------------------------------------------------------------------
+*/
 
 #ifndef INCLUDED_AI_IRRXML_WRAPPER
 #define INCLUDED_AI_IRRXML_WRAPPER
@@ -5,43 +44,69 @@
 // some long includes ....
 #include "./../contrib/irrXML/irrXML.h"
 #include "./../include/IOStream.h"
-
-namespace Assimp 
-{
+namespace Assimp	{
 
 // ---------------------------------------------------------------------------------
 /** @brief Utility class to make IrrXML work together with our custom IO system
- *
- *  See the IrrXML docs for more details.
- */
-class CIrrXML_IOStreamReader : public irr::io::IFileReadCallBack
+ *  See the IrrXML docs for more details.*/
+class CIrrXML_IOStreamReader 
+	: public irr::io::IFileReadCallBack
 {
 public:
 
+	// ----------------------------------------------------------------------------------
 	//! Construction from an existing IOStream
 	CIrrXML_IOStreamReader(IOStream* _stream)
 		: stream (_stream)
-	{}
+		, t (0)
+	{
+
+		// Map the buffer into memory and convert it to UTF8. IrrXML provides its
+		// own conversion, which is merely a cast from uintNN_t to uint8_t. Thus,
+		// it is not suitable for our purposes and we have to do it BEFORE IrrXML
+		// gets the buffer. Sadly, this forces as to map the whole file into
+		// memory.
+
+		data.resize(stream->FileSize());
+		stream->Read(&data[0],data.size(),1);
+
+		BaseImporter::ConvertToUTF8(data);
+	}
 
+	// ----------------------------------------------------------------------------------
 	//! Virtual destructor
 	virtual ~CIrrXML_IOStreamReader() {};
 
+	// ----------------------------------------------------------------------------------
 	//!   Reads an amount of bytes from the file.
 	/**  @param buffer:       Pointer to output buffer.
 	 *   @param sizeToRead:   Amount of bytes to read 
-	 *   @return              Returns how much bytes were read.
-	 */
+	 *   @return              Returns how much bytes were read.  */
 	virtual int read(void* buffer, int sizeToRead)	{
-		return (int)stream->Read(buffer,1,sizeToRead);
+		if(sizeToRead<0) {
+			return 0;
+		}
+		if(t+sizeToRead>data.size()) {
+			sizeToRead = data.size()-t;
+		}
+
+		memcpy(buffer,&data.front()+t,sizeToRead);
+
+		t += sizeToRead;
+		return sizeToRead;
 	}
 
+	// ----------------------------------------------------------------------------------
 	//! Returns size of file in bytes
 	virtual int getSize()	{
-		return (int)stream->FileSize();
+		return (int)data.size();
 	}
 
 private:
 	IOStream* stream;
+	std::vector<char> data;
+	size_t t;
+
 }; // ! class CIrrXML_IOStreamReader
 
 } // ! Assimp

+ 2 - 4
code/makefile

@@ -1,6 +1,3 @@
-
-# UNTESTED!!!!
-
 # Makefile for Open Asset Import Library (GNU-make)
 # [email protected]
 
@@ -23,8 +20,9 @@ OBJECTS   := $(patsubst %.cpp,%.o,  $(wildcard *.cpp))
 OBJECTS   += $(patsubst %.cpp,%.o,  $(wildcard extra/*.cpp)) 
 OBJECTS   += $(patsubst %.cpp,%.o,  $(wildcard ./../contrib/irrXML/*.cpp)) 
 
-# C object files (mainly from zlib)
+# C object files
 OBJECTSC  := $(patsubst %.c,%.oc,   $(wildcard ./../contrib/zlib/*.c))
+OBJECTSC  += $(patsubst %.c,%.oc,   $(wildcard ./../contrib/ConvertUTF/*.c))
 
 # Include flags for gcc
 INCLUDEFLAGS =

+ 2 - 1
code/makefile.mingw

@@ -23,8 +23,9 @@ OBJECTS   := $(patsubst %.cpp,%.o,  $(wildcard *.cpp))
 OBJECTS   += $(patsubst %.cpp,%.o,  $(wildcard extra/*.cpp)) 
 OBJECTS   += $(patsubst %.cpp,%.o,  $(wildcard ./../contrib/irrXML/*.cpp)) 
 
-# C object files (mainly from zlib)
+# C object files 
 OBJECTSC  := $(patsubst %.c,%.oc,   $(wildcard ./../contrib/zlib/*.c))
+OBJECTSC  += $(patsubst %.c,%.oc,   $(wildcard ./../contrib/ConvertUTF/*.c))
 
 # Include flags for gcc
 INCLUDEFLAGS =

+ 539 - 0
contrib/ConvertUTF/ConvertUTF.c

@@ -0,0 +1,539 @@
+/*
+ * Copyright 2001-2004 Unicode, Inc.
+ * 
+ * Disclaimer
+ * 
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ * 
+ * Limitations on Rights to Redistribute This Code
+ * 
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
+
+/* ---------------------------------------------------------------------
+
+    Conversions between UTF32, UTF-16, and UTF-8. Source code file.
+    Author: Mark E. Davis, 1994.
+    Rev History: Rick McGowan, fixes & updates May 2001.
+    Sept 2001: fixed const & error conditions per
+	mods suggested by S. Parent & A. Lillich.
+    June 2002: Tim Dodd added detection and handling of incomplete
+	source sequences, enhanced error detection, added casts
+	to eliminate compiler warnings.
+    July 2003: slight mods to back out aggressive FFFE detection.
+    Jan 2004: updated switches in from-UTF8 conversions.
+    Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
+
+    See the header file "ConvertUTF.h" for complete documentation.
+
+------------------------------------------------------------------------ */
+
+
+#include "ConvertUTF.h"
+#ifdef CVTUTF_DEBUG
+#include <stdio.h>
+#endif
+
+static const int halfShift  = 10; /* used for shifting by 10 bits */
+
+static const UTF32 halfBase = 0x0010000UL;
+static const UTF32 halfMask = 0x3FFUL;
+
+#define UNI_SUR_HIGH_START  (UTF32)0xD800
+#define UNI_SUR_HIGH_END    (UTF32)0xDBFF
+#define UNI_SUR_LOW_START   (UTF32)0xDC00
+#define UNI_SUR_LOW_END     (UTF32)0xDFFF
+#define false	   0
+#define true	    1
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF32toUTF16 (
+	const UTF32** sourceStart, const UTF32* sourceEnd, 
+	UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF32* source = *sourceStart;
+    UTF16* target = *targetStart;
+    while (source < sourceEnd) {
+	UTF32 ch;
+	if (target >= targetEnd) {
+	    result = targetExhausted; break;
+	}
+	ch = *source++;
+	if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
+	    /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
+	    if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+		if (flags == strictConversion) {
+		    --source; /* return to the illegal value itself */
+		    result = sourceIllegal;
+		    break;
+		} else {
+		    *target++ = UNI_REPLACEMENT_CHAR;
+		}
+	    } else {
+		*target++ = (UTF16)ch; /* normal case */
+	    }
+	} else if (ch > UNI_MAX_LEGAL_UTF32) {
+	    if (flags == strictConversion) {
+		result = sourceIllegal;
+	    } else {
+		*target++ = UNI_REPLACEMENT_CHAR;
+	    }
+	} else {
+	    /* target is a character in range 0xFFFF - 0x10FFFF. */
+	    if (target + 1 >= targetEnd) {
+		--source; /* Back up source pointer! */
+		result = targetExhausted; break;
+	    }
+	    ch -= halfBase;
+	    *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
+	    *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
+	}
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF16toUTF32 (
+	const UTF16** sourceStart, const UTF16* sourceEnd, 
+	UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF16* source = *sourceStart;
+    UTF32* target = *targetStart;
+    UTF32 ch, ch2;
+    while (source < sourceEnd) {
+	const UTF16* oldSource = source; /*  In case we have to back up because of target overflow. */
+	ch = *source++;
+	/* If we have a surrogate pair, convert to UTF32 first. */
+	if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+	    /* If the 16 bits following the high surrogate are in the source buffer... */
+	    if (source < sourceEnd) {
+		ch2 = *source;
+		/* If it's a low surrogate, convert to UTF32. */
+		if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
+		    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+			+ (ch2 - UNI_SUR_LOW_START) + halfBase;
+		    ++source;
+		} else if (flags == strictConversion) { /* it's an unpaired high surrogate */
+		    --source; /* return to the illegal value itself */
+		    result = sourceIllegal;
+		    break;
+		}
+	    } else { /* We don't have the 16 bits following the high surrogate. */
+		--source; /* return to the high surrogate */
+		result = sourceExhausted;
+		break;
+	    }
+	} else if (flags == strictConversion) {
+	    /* UTF-16 surrogate values are illegal in UTF-32 */
+	    if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
+		--source; /* return to the illegal value itself */
+		result = sourceIllegal;
+		break;
+	    }
+	}
+	if (target >= targetEnd) {
+	    source = oldSource; /* Back up source pointer! */
+	    result = targetExhausted; break;
+	}
+	*target++ = ch;
+    }
+    *sourceStart = source;
+    *targetStart = target;
+#ifdef CVTUTF_DEBUG
+if (result == sourceIllegal) {
+    fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
+    fflush(stderr);
+}
+#endif
+    return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Index into the table below with the first byte of a UTF-8 sequence to
+ * get the number of trailing bytes that are supposed to follow it.
+ * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
+ * left as-is for anyone who may want to do such conversion, which was
+ * allowed in earlier algorithms.
+ */
+static const char trailingBytesForUTF8[256] = {
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+};
+
+/*
+ * Magic values subtracted from a buffer value during UTF8 conversion.
+ * This table contains as many values as there might be trailing bytes
+ * in a UTF-8 sequence.
+ */
+static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 
+		     0x03C82080UL, 0xFA082080UL, 0x82082080UL };
+
+/*
+ * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
+ * into the first byte, depending on how many bytes follow.  There are
+ * as many entries in this table as there are UTF-8 sequence types.
+ * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
+ * for *legal* UTF-8 will be 4 or fewer bytes total.
+ */
+static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+/* --------------------------------------------------------------------- */
+
+/* The interface converts a whole buffer to avoid function-call overhead.
+ * Constants have been gathered. Loops & conditionals have been removed as
+ * much as possible for efficiency, in favor of drop-through switches.
+ * (See "Note A" at the bottom of the file for equivalent code.)
+ * If your compiler supports it, the "isLegalUTF8" call can be turned
+ * into an inline function.
+ */
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF16toUTF8 (
+	const UTF16** sourceStart, const UTF16* sourceEnd, 
+	UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF16* source = *sourceStart;
+    UTF8* target = *targetStart;
+    while (source < sourceEnd) {
+	UTF32 ch;
+	unsigned short bytesToWrite = 0;
+	const UTF32 byteMask = 0xBF;
+	const UTF32 byteMark = 0x80; 
+	const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
+	ch = *source++;
+	/* If we have a surrogate pair, convert to UTF32 first. */
+	if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+	    /* If the 16 bits following the high surrogate are in the source buffer... */
+	    if (source < sourceEnd) {
+		UTF32 ch2 = *source;
+		/* If it's a low surrogate, convert to UTF32. */
+		if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
+		    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+			+ (ch2 - UNI_SUR_LOW_START) + halfBase;
+		    ++source;
+		} else if (flags == strictConversion) { /* it's an unpaired high surrogate */
+		    --source; /* return to the illegal value itself */
+		    result = sourceIllegal;
+		    break;
+		}
+	    } else { /* We don't have the 16 bits following the high surrogate. */
+		--source; /* return to the high surrogate */
+		result = sourceExhausted;
+		break;
+	    }
+	} else if (flags == strictConversion) {
+	    /* UTF-16 surrogate values are illegal in UTF-32 */
+	    if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
+		--source; /* return to the illegal value itself */
+		result = sourceIllegal;
+		break;
+	    }
+	}
+	/* Figure out how many bytes the result will require */
+	if (ch < (UTF32)0x80) {	     bytesToWrite = 1;
+	} else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
+	} else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
+	} else if (ch < (UTF32)0x110000) {  bytesToWrite = 4;
+	} else {			    bytesToWrite = 3;
+					    ch = UNI_REPLACEMENT_CHAR;
+	}
+
+	target += bytesToWrite;
+	if (target > targetEnd) {
+	    source = oldSource; /* Back up source pointer! */
+	    target -= bytesToWrite; result = targetExhausted; break;
+	}
+	switch (bytesToWrite) { /* note: everything falls through. */
+	    case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+	    case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+	    case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+	    case 1: *--target =  (UTF8)(ch | firstByteMark[bytesToWrite]);
+	}
+	target += bytesToWrite;
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Utility routine to tell whether a sequence of bytes is legal UTF-8.
+ * This must be called with the length pre-determined by the first byte.
+ * If not calling this from ConvertUTF8to*, then the length can be set by:
+ *  length = trailingBytesForUTF8[*source]+1;
+ * and the sequence is illegal right away if there aren't that many bytes
+ * available.
+ * If presented with a length > 4, this returns false.  The Unicode
+ * definition of UTF-8 goes up to 4-byte sequences.
+ */
+
+static Boolean isLegalUTF8(const UTF8 *source, int length) {
+    UTF8 a;
+    const UTF8 *srcptr = source+length;
+    switch (length) {
+    default: return false;
+	/* Everything else falls through when "true"... */
+    case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
+    case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
+    case 2: if ((a = (*--srcptr)) > 0xBF) return false;
+
+	switch (*source) {
+	    /* no fall-through in this inner switch */
+	    case 0xE0: if (a < 0xA0) return false; break;
+	    case 0xED: if (a > 0x9F) return false; break;
+	    case 0xF0: if (a < 0x90) return false; break;
+	    case 0xF4: if (a > 0x8F) return false; break;
+	    default:   if (a < 0x80) return false;
+	}
+
+    case 1: if (*source >= 0x80 && *source < 0xC2) return false;
+    }
+    if (*source > 0xF4) return false;
+    return true;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Exported function to return whether a UTF-8 sequence is legal or not.
+ * This is not used here; it's just exported.
+ */
+Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
+    int length = trailingBytesForUTF8[*source]+1;
+    if (source+length > sourceEnd) {
+	return false;
+    }
+    return isLegalUTF8(source, length);
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF8toUTF16 (
+	const UTF8** sourceStart, const UTF8* sourceEnd, 
+	UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF8* source = *sourceStart;
+    UTF16* target = *targetStart;
+    while (source < sourceEnd) {
+	UTF32 ch = 0;
+	unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
+	if (source + extraBytesToRead >= sourceEnd) {
+	    result = sourceExhausted; break;
+	}
+	/* Do this check whether lenient or strict */
+	if (! isLegalUTF8(source, extraBytesToRead+1)) {
+	    result = sourceIllegal;
+	    break;
+	}
+	/*
+	 * The cases all fall through. See "Note A" below.
+	 */
+	switch (extraBytesToRead) {
+	    case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+	    case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+	    case 3: ch += *source++; ch <<= 6;
+	    case 2: ch += *source++; ch <<= 6;
+	    case 1: ch += *source++; ch <<= 6;
+	    case 0: ch += *source++;
+	}
+	ch -= offsetsFromUTF8[extraBytesToRead];
+
+	if (target >= targetEnd) {
+	    source -= (extraBytesToRead+1); /* Back up source pointer! */
+	    result = targetExhausted; break;
+	}
+	if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
+	    /* UTF-16 surrogate values are illegal in UTF-32 */
+	    if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+		if (flags == strictConversion) {
+		    source -= (extraBytesToRead+1); /* return to the illegal value itself */
+		    result = sourceIllegal;
+		    break;
+		} else {
+		    *target++ = UNI_REPLACEMENT_CHAR;
+		}
+	    } else {
+		*target++ = (UTF16)ch; /* normal case */
+	    }
+	} else if (ch > UNI_MAX_UTF16) {
+	    if (flags == strictConversion) {
+		result = sourceIllegal;
+		source -= (extraBytesToRead+1); /* return to the start */
+		break; /* Bail out; shouldn't continue */
+	    } else {
+		*target++ = UNI_REPLACEMENT_CHAR;
+	    }
+	} else {
+	    /* target is a character in range 0xFFFF - 0x10FFFF. */
+	    if (target + 1 >= targetEnd) {
+		source -= (extraBytesToRead+1); /* Back up source pointer! */
+		result = targetExhausted; break;
+	    }
+	    ch -= halfBase;
+	    *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
+	    *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
+	}
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF32toUTF8 (
+	const UTF32** sourceStart, const UTF32* sourceEnd, 
+	UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF32* source = *sourceStart;
+    UTF8* target = *targetStart;
+    while (source < sourceEnd) {
+	UTF32 ch;
+	unsigned short bytesToWrite = 0;
+	const UTF32 byteMask = 0xBF;
+	const UTF32 byteMark = 0x80; 
+	ch = *source++;
+	if (flags == strictConversion ) {
+	    /* UTF-16 surrogate values are illegal in UTF-32 */
+	    if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+		--source; /* return to the illegal value itself */
+		result = sourceIllegal;
+		break;
+	    }
+	}
+	/*
+	 * Figure out how many bytes the result will require. Turn any
+	 * illegally large UTF32 things (> Plane 17) into replacement chars.
+	 */
+	if (ch < (UTF32)0x80) {	     bytesToWrite = 1;
+	} else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
+	} else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
+	} else if (ch <= UNI_MAX_LEGAL_UTF32) {  bytesToWrite = 4;
+	} else {			    bytesToWrite = 3;
+					    ch = UNI_REPLACEMENT_CHAR;
+					    result = sourceIllegal;
+	}
+	
+	target += bytesToWrite;
+	if (target > targetEnd) {
+	    --source; /* Back up source pointer! */
+	    target -= bytesToWrite; result = targetExhausted; break;
+	}
+	switch (bytesToWrite) { /* note: everything falls through. */
+	    case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+	    case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+	    case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+	    case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
+	}
+	target += bytesToWrite;
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF8toUTF32 (
+	const UTF8** sourceStart, const UTF8* sourceEnd, 
+	UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF8* source = *sourceStart;
+    UTF32* target = *targetStart;
+    while (source < sourceEnd) {
+	UTF32 ch = 0;
+	unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
+	if (source + extraBytesToRead >= sourceEnd) {
+	    result = sourceExhausted; break;
+	}
+	/* Do this check whether lenient or strict */
+	if (! isLegalUTF8(source, extraBytesToRead+1)) {
+	    result = sourceIllegal;
+	    break;
+	}
+	/*
+	 * The cases all fall through. See "Note A" below.
+	 */
+	switch (extraBytesToRead) {
+	    case 5: ch += *source++; ch <<= 6;
+	    case 4: ch += *source++; ch <<= 6;
+	    case 3: ch += *source++; ch <<= 6;
+	    case 2: ch += *source++; ch <<= 6;
+	    case 1: ch += *source++; ch <<= 6;
+	    case 0: ch += *source++;
+	}
+	ch -= offsetsFromUTF8[extraBytesToRead];
+
+	if (target >= targetEnd) {
+	    source -= (extraBytesToRead+1); /* Back up the source pointer! */
+	    result = targetExhausted; break;
+	}
+	if (ch <= UNI_MAX_LEGAL_UTF32) {
+	    /*
+	     * UTF-16 surrogate values are illegal in UTF-32, and anything
+	     * over Plane 17 (> 0x10FFFF) is illegal.
+	     */
+	    if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+		if (flags == strictConversion) {
+		    source -= (extraBytesToRead+1); /* return to the illegal value itself */
+		    result = sourceIllegal;
+		    break;
+		} else {
+		    *target++ = UNI_REPLACEMENT_CHAR;
+		}
+	    } else {
+		*target++ = ch;
+	    }
+	} else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
+	    result = sourceIllegal;
+	    *target++ = UNI_REPLACEMENT_CHAR;
+	}
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+}
+
+/* ---------------------------------------------------------------------
+
+    Note A.
+    The fall-through switches in UTF-8 reading code save a
+    temp variable, some decrements & conditionals.  The switches
+    are equivalent to the following loop:
+	{
+	    int tmpBytesToRead = extraBytesToRead+1;
+	    do {
+		ch += *source++;
+		--tmpBytesToRead;
+		if (tmpBytesToRead) ch <<= 6;
+	    } while (tmpBytesToRead > 0);
+	}
+    In UTF-8 writing code, the switches on "bytesToWrite" are
+    similarly unrolled loops.
+
+   --------------------------------------------------------------------- */

+ 149 - 0
contrib/ConvertUTF/ConvertUTF.h

@@ -0,0 +1,149 @@
+/*
+ * Copyright 2001-2004 Unicode, Inc.
+ * 
+ * Disclaimer
+ * 
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ * 
+ * Limitations on Rights to Redistribute This Code
+ * 
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
+
+/* ---------------------------------------------------------------------
+
+    Conversions between UTF32, UTF-16, and UTF-8.  Header file.
+
+    Several funtions are included here, forming a complete set of
+    conversions between the three formats.  UTF-7 is not included
+    here, but is handled in a separate source file.
+
+    Each of these routines takes pointers to input buffers and output
+    buffers.  The input buffers are const.
+
+    Each routine converts the text between *sourceStart and sourceEnd,
+    putting the result into the buffer between *targetStart and
+    targetEnd. Note: the end pointers are *after* the last item: e.g. 
+    *(sourceEnd - 1) is the last item.
+
+    The return result indicates whether the conversion was successful,
+    and if not, whether the problem was in the source or target buffers.
+    (Only the first encountered problem is indicated.)
+
+    After the conversion, *sourceStart and *targetStart are both
+    updated to point to the end of last text successfully converted in
+    the respective buffers.
+
+    Input parameters:
+	sourceStart - pointer to a pointer to the source buffer.
+		The contents of this are modified on return so that
+		it points at the next thing to be converted.
+	targetStart - similarly, pointer to pointer to the target buffer.
+	sourceEnd, targetEnd - respectively pointers to the ends of the
+		two buffers, for overflow checking only.
+
+    These conversion functions take a ConversionFlags argument. When this
+    flag is set to strict, both irregular sequences and isolated surrogates
+    will cause an error.  When the flag is set to lenient, both irregular
+    sequences and isolated surrogates are converted.
+
+    Whether the flag is strict or lenient, all illegal sequences will cause
+    an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>,
+    or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code
+    must check for illegal sequences.
+
+    When the flag is set to lenient, characters over 0x10FFFF are converted
+    to the replacement character; otherwise (when the flag is set to strict)
+    they constitute an error.
+
+    Output parameters:
+	The value "sourceIllegal" is returned from some routines if the input
+	sequence is malformed.  When "sourceIllegal" is returned, the source
+	value will point to the illegal value that caused the problem. E.g.,
+	in UTF-8 when a sequence is malformed, it points to the start of the
+	malformed sequence.  
+
+    Author: Mark E. Davis, 1994.
+    Rev History: Rick McGowan, fixes & updates May 2001.
+		 Fixes & updates, Sept 2001.
+
+------------------------------------------------------------------------ */
+
+/* ---------------------------------------------------------------------
+    The following 4 definitions are compiler-specific.
+    The C standard does not guarantee that wchar_t has at least
+    16 bits, so wchar_t is no less portable than unsigned short!
+    All should be unsigned values to avoid sign extension during
+    bit mask & shift operations.
+------------------------------------------------------------------------ */
+
+typedef unsigned long	UTF32;	/* at least 32 bits */
+typedef unsigned short	UTF16;	/* at least 16 bits */
+typedef unsigned char	UTF8;	/* typically 8 bits */
+typedef unsigned char	Boolean; /* 0 or 1 */
+
+/* Some fundamental constants */
+#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
+#define UNI_MAX_BMP (UTF32)0x0000FFFF
+#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
+#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
+#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
+
+typedef enum {
+	conversionOK, 		/* conversion successful */
+	sourceExhausted,	/* partial character in source, but hit end */
+	targetExhausted,	/* insuff. room in target for conversion */
+	sourceIllegal		/* source sequence is illegal/malformed */
+} ConversionResult;
+
+typedef enum {
+	strictConversion = 0,
+	lenientConversion
+} ConversionFlags;
+
+/* This is for C++ and does no harm in C */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ConversionResult ConvertUTF8toUTF16 (
+		const UTF8** sourceStart, const UTF8* sourceEnd, 
+		UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
+
+ConversionResult ConvertUTF16toUTF8 (
+		const UTF16** sourceStart, const UTF16* sourceEnd, 
+		UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
+		
+ConversionResult ConvertUTF8toUTF32 (
+		const UTF8** sourceStart, const UTF8* sourceEnd, 
+		UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
+
+ConversionResult ConvertUTF32toUTF8 (
+		const UTF32** sourceStart, const UTF32* sourceEnd, 
+		UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
+		
+ConversionResult ConvertUTF16toUTF32 (
+		const UTF16** sourceStart, const UTF16* sourceEnd, 
+		UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
+
+ConversionResult ConvertUTF32toUTF16 (
+		const UTF32** sourceStart, const UTF32* sourceEnd, 
+		UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
+
+Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
+
+#ifdef __cplusplus
+}
+#endif
+
+/* --------------------------------------------------------------------- */

+ 43 - 0
contrib/ConvertUTF/readme.txt

@@ -0,0 +1,43 @@
+
+The accompanying C source code file "ConvertUTF.c" and the associated header
+file "ConvertUTF.h" provide for conversion between various transformation
+formats of Unicode characters.  The following conversions are supported:
+
+	UTF-32 to UTF-16
+	UTF-32 to UTF-8
+	UTF-16 to UTF-32
+	UTF-16 to UTF-8
+	UTF-8 to UTF-16
+	UTF-8 to UTF-32
+
+In addition, there is a test harness which runs various tests.
+
+The files "CVTUTF7.C" and "CVTUTF7.H" are for archival and historical purposes
+only. They have not been updated to Unicode 3.0 or later and should be
+considered obsolescent. "CVTUTF7.C" contains two functions that can convert
+between UCS2 (i.e., the BMP characters only) and UTF-7. Surrogates are
+not supported, the code has not been tested, and should be considered
+unsuitable for general purpose use.
+
+Please submit any bug reports about these programs here:
+
+	http://www.unicode.org/unicode/reporting.html
+
+Version 1.0: initial version.
+
+Version 1.1: corrected some minor problems; added stricter checks.
+
+Version 1.2: corrected switch statements associated with "extraBytesToRead"
+	in 4 & 5 byte cases, in functions for conversion from UTF8.
+	Note: formally, the 4 & 5 byte cases are illegal in the latest
+	UTF8, but the table and this code has always catered for those,
+	cases since at one time they were legal.
+
+Version 1.3: Updated UTF-8 legality check;
+	updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions
+	Updated UTF-8 legality tests in harness.c
+ 
+
+Last update: October 19, 2004
+
+

+ 24 - 8
include/aiTypes.h

@@ -255,10 +255,24 @@ struct aiColor4D
 #include "./Compiler/poppack1.h"
 
 // ----------------------------------------------------------------------------------
-/** Represents a string, zero byte terminated.
+/** Represents an UTF-8 string, zero byte terminated.
  *
- *  We use this representation to be C-compatible. The length of such a string is
- *  limited to MAXLEN characters (excluding the terminal zero).
+ *  The character set of an aiString is explicitly defined to be UTF-8. This Unicode
+ *  transformation was chosen in the belief that most strings in 3d files are limited
+ *  to the ASCII characters, thus the character set needed to be ASCII compatible.
+ *  
+ *  Most text file loaders provide proper Unicode input file handling, special unicode
+ *  characters are correctly transcoded to UTF8 and are kept throughout the libraries'
+ *  import pipeline. 
+ *
+ *  For most applications, it will be absolutely sufficient to interpret the
+ *  aiString as ASCII data and work with it as one would work with a plain char*. 
+ *  Windows users in need of proper support for i.e asian characters can use the
+ *  #MultiByteToWideChar(), #WideCharToMultiByte() WinAPI functionality to convert the
+ *  UTF-8 strings to their working character set (i.e. MBCS, WideChar).
+ *
+ *  We use this representation instead of std::string to be C-compatible. The 
+ *  (binary) length of such a string is limited to MAXLEN characters (excluding the 0).
 */
 struct aiString
 {
@@ -271,7 +285,7 @@ struct aiString
 
 #ifdef _DEBUG
 		// Debug build: overwrite the string on its full length with ESC (27)
-		::memset(data+1,27,MAXLEN-1);
+		memset(data+1,27,MAXLEN-1);
 #endif
 	}
 
@@ -279,7 +293,7 @@ struct aiString
 	aiString(const aiString& rOther) : 
 		length(rOther.length) 
 	{
-		::memcpy( data, rOther.data, rOther.length);
+		memcpy( data, rOther.data, rOther.length);
 		data[length] = '\0';
 	}
 
@@ -344,7 +358,7 @@ struct aiString
 			return;
 		}
 
-		::memcpy(&data[length],app,len+1);
+		memcpy(&data[length],app,len+1);
 		length += len;
 	}
 
@@ -355,13 +369,15 @@ struct aiString
 
 #ifdef _DEBUG
 		// Debug build: overwrite the string on its full length with ESC (27)
-		::memset(data+1,27,MAXLEN-1);
+		memset(data+1,27,MAXLEN-1);
 #endif
 	}
 
 #endif // !__cplusplus
 
-	/** Length of the string excluding the terminal 0 */
+	/** Binary length of the string excluding the terminal 0. This is NOT the 
+	 *  logical length of strings containing UTF-8 multibyte sequences! It's
+	 *  the number of bytes from the beginning of the string to its end.*/
 	size_t length;
 
 	/** String buffer. Size limit is MAXLEN */

+ 1 - 1
mkutil/revision.h

@@ -1 +1 @@
-#define SVNRevision  433 
+#define SVNRevision  467 

BIN
test/models/AC/SphereWithLight_UTF16LE.ac


+ 1134 - 0
test/models/AC/SphereWithLight_UTF8BOM.ac

@@ -0,0 +1,1134 @@
+AC3Db
+MATERIAL "ac3dmat1" rgb 1 1 1  amb 0.2 0.2 0.2  emis 0 0 0  spec 0.2 0.2 0.2  shi 128  trans 0
+OBJECT world
+kids 2
+OBJECT light
+name "中国菜中国菜2"
+loc 0.000424567 -0.0127304 0
+kids 0
+OBJECT poly
+name "中国菜"
+loc -0.0624103 -0.012381 0.0558408
+texture "./../LWO/LWO2/MappingModes/earthSpherical.jpg"
+crease 45.000000
+numvert 134
+-0.00202139 0.0563461 0
+0.0108348 0.0544951 -0.00722633
+0.00540113 0.0544951 -0.0125164
+-0.00202139 0.0544951 -0.0144527
+-0.0094439 0.0544951 -0.0125164
+-0.0148776 0.0544951 -0.00722633
+-0.0168664 0.0544951 0
+-0.0148776 0.0544951 0.00722633
+-0.00944391 0.0544951 0.0125164
+-0.00202139 0.0544951 0.0144527
+0.00540113 0.0544951 0.0125164
+0.0108348 0.0544951 0.00722633
+0.0128236 0.0544951 0
+0.0228148 0.049068 -0.0139602
+0.0123178 0.049068 -0.0241798
+-0.00202138 0.049068 -0.0279204
+-0.0163606 0.049068 -0.0241798
+-0.0268576 0.049068 -0.0139602
+-0.0306998 0.049068 -3.72529e-009
+-0.0268576 0.049068 0.0139602
+-0.0163606 0.049068 0.0241798
+-0.00202139 0.049068 0.0279204
+0.0123178 0.049068 0.0241798
+0.0228148 0.049068 0.0139602
+0.026657 0.049068 0
+0.0331024 0.0404348 -0.0197427
+0.0182573 0.0404348 -0.0341954
+-0.00202138 0.0404348 -0.0394854
+-0.0223001 0.0404348 -0.0341954
+-0.0371451 0.0404348 -0.0197427
+-0.0425788 0.0404348 -3.72529e-009
+-0.0371451 0.0404348 0.0197427
+-0.0223001 0.0404348 0.0341954
+-0.00202139 0.0404348 0.0394854
+0.0182573 0.0404348 0.0341954
+0.0331024 0.0404348 0.0197427
+0.038536 0.0404348 0
+0.0409962 0.0291838 -0.0241798
+0.0228149 0.0291838 -0.0418806
+-0.00202138 0.0291838 -0.0483595
+-0.0268576 0.0291838 -0.0418806
+-0.045039 0.0291838 -0.0241798
+-0.0516939 0.0291838 -3.72529e-009
+-0.045039 0.0291838 0.0241798
+-0.0268576 0.0291838 0.0418806
+-0.00202139 0.0291838 0.0483595
+0.0228148 0.0291838 0.0418806
+0.0409962 0.0291838 0.0241798
+0.0476511 0.0291838 0
+0.0459585 0.0160817 -0.026969
+0.0256798 0.0160817 -0.0467117
+-0.00202138 0.0160817 -0.0539381
+-0.0297226 0.0160817 -0.0467117
+-0.0500013 0.0160817 -0.026969
+-0.0574238 0.0160817 -3.72529e-009
+-0.0500013 0.0160817 0.026969
+-0.0297226 0.0160817 0.0467117
+-0.00202139 0.0160817 0.0539381
+0.0256798 0.0160817 0.0467117
+0.0459585 0.0160817 0.026969
+0.053381 0.0160817 0
+0.0476511 0.00202139 -0.0279204
+0.026657 0.00202139 -0.0483595
+-0.00202138 0.00202139 -0.0558408
+-0.0306998 0.00202139 -0.0483595
+-0.0516938 0.00202139 -0.0279204
+-0.0593782 0.00202139 -3.72529e-009
+-0.0516939 0.00202139 0.0279204
+-0.0306998 0.00202139 0.0483595
+-0.00202139 0.00202139 0.0558408
+0.026657 0.00202139 0.0483595
+0.0476511 0.00202139 0.0279204
+0.0553354 0.00202139 0
+0.0459585 -0.0120389 -0.026969
+0.0256798 -0.0120389 -0.0467117
+-0.00202138 -0.0120389 -0.0539381
+-0.0297226 -0.0120389 -0.0467117
+-0.0500013 -0.0120389 -0.026969
+-0.0574238 -0.0120389 -3.72529e-009
+-0.0500013 -0.0120389 0.026969
+-0.0297226 -0.0120389 0.0467117
+-0.00202139 -0.0120389 0.0539381
+0.0256798 -0.0120389 0.0467117
+0.0459585 -0.0120389 0.026969
+0.053381 -0.0120389 0
+0.0409962 -0.025141 -0.0241798
+0.0228149 -0.025141 -0.0418806
+-0.00202138 -0.025141 -0.0483595
+-0.0268576 -0.025141 -0.0418806
+-0.045039 -0.025141 -0.0241798
+-0.0516939 -0.025141 -3.72529e-009
+-0.045039 -0.025141 0.0241798
+-0.0268576 -0.025141 0.0418806
+-0.00202139 -0.025141 0.0483595
+0.0228148 -0.025141 0.0418806
+0.0409962 -0.025141 0.0241798
+0.0476511 -0.025141 0
+0.0331024 -0.036392 -0.0197427
+0.0182573 -0.036392 -0.0341954
+-0.00202138 -0.036392 -0.0394854
+-0.0223001 -0.036392 -0.0341954
+-0.0371451 -0.036392 -0.0197427
+-0.0425788 -0.036392 -3.72529e-009
+-0.0371451 -0.036392 0.0197427
+-0.0223001 -0.036392 0.0341954
+-0.00202139 -0.036392 0.0394854
+0.0182573 -0.036392 0.0341954
+0.0331024 -0.036392 0.0197427
+0.038536 -0.036392 0
+0.0228148 -0.0450252 -0.0139602
+0.0123178 -0.0450252 -0.0241798
+-0.00202138 -0.0450252 -0.0279204
+-0.0163606 -0.0450252 -0.0241798
+-0.0268576 -0.0450252 -0.0139602
+-0.0306998 -0.0450252 -3.72529e-009
+-0.0268576 -0.0450252 0.0139602
+-0.0163606 -0.0450252 0.0241798
+-0.00202139 -0.0450252 0.0279204
+0.0123178 -0.0450252 0.0241798
+0.0228148 -0.0450252 0.0139602
+0.026657 -0.0450252 0
+0.0108348 -0.0504523 -0.00722633
+0.00540113 -0.0504523 -0.0125164
+-0.00202139 -0.0504523 -0.0144527
+-0.0094439 -0.0504523 -0.0125164
+-0.0148776 -0.0504523 -0.00722633
+-0.0168664 -0.0504523 0
+-0.0148776 -0.0504523 0.00722633
+-0.00944391 -0.0504523 0.0125164
+-0.00202139 -0.0504523 0.0144527
+0.00540113 -0.0504523 0.0125164
+0.0108348 -0.0504523 0.00722633
+0.0128236 -0.0504523 0
+-0.00202139 -0.0523034 0
+numsurf 144
+SURF 0x10
+mat 0
+refs 4
+119 0.916667 0.166667
+131 0.916667 0.0833333
+132 1 0.0833333
+120 1 0.166667
+SURF 0x10
+mat 0
+refs 4
+118 0.833333 0.166667
+130 0.833333 0.0833333
+131 0.916667 0.0833333
+119 0.916667 0.166667
+SURF 0x10
+mat 0
+refs 4
+117 0.75 0.166667
+129 0.75 0.0833333
+130 0.833333 0.0833333
+118 0.833333 0.166667
+SURF 0x10
+mat 0
+refs 4
+116 0.666667 0.166667
+128 0.666667 0.0833333
+129 0.75 0.0833333
+117 0.75 0.166667
+SURF 0x10
+mat 0
+refs 4
+115 0.583333 0.166667
+127 0.583333 0.0833333
+128 0.666667 0.0833333
+116 0.666667 0.166667
+SURF 0x10
+mat 0
+refs 4
+114 0.5 0.166667
+126 0.5 0.0833333
+127 0.583333 0.0833333
+115 0.583333 0.166667
+SURF 0x10
+mat 0
+refs 4
+113 0.416667 0.166667
+125 0.416667 0.0833333
+126 0.5 0.0833333
+114 0.5 0.166667
+SURF 0x10
+mat 0
+refs 4
+112 0.333333 0.166667
+124 0.333333 0.0833333
+125 0.416667 0.0833333
+113 0.416667 0.166667
+SURF 0x10
+mat 0
+refs 4
+111 0.25 0.166667
+123 0.25 0.0833333
+124 0.333333 0.0833333
+112 0.333333 0.166667
+SURF 0x10
+mat 0
+refs 4
+110 0.166667 0.166667
+122 0.166667 0.0833333
+123 0.25 0.0833333
+111 0.25 0.166667
+SURF 0x10
+mat 0
+refs 4
+109 0.0833333 0.166667
+121 0.0833333 0.0833333
+122 0.166667 0.0833333
+110 0.166667 0.166667
+SURF 0x10
+mat 0
+refs 4
+120 -2.98023e-008 0.166667
+132 -2.98023e-008 0.0833333
+121 0.0833333 0.0833333
+109 0.0833333 0.166667
+SURF 0x10
+mat 0
+refs 4
+107 0.916667 0.25
+119 0.916667 0.166667
+120 1 0.166667
+108 1 0.25
+SURF 0x10
+mat 0
+refs 4
+106 0.833333 0.25
+118 0.833333 0.166667
+119 0.916667 0.166667
+107 0.916667 0.25
+SURF 0x10
+mat 0
+refs 4
+105 0.75 0.25
+117 0.75 0.166667
+118 0.833333 0.166667
+106 0.833333 0.25
+SURF 0x10
+mat 0
+refs 4
+104 0.666667 0.25
+116 0.666667 0.166667
+117 0.75 0.166667
+105 0.75 0.25
+SURF 0x10
+mat 0
+refs 4
+103 0.583333 0.25
+115 0.583333 0.166667
+116 0.666667 0.166667
+104 0.666667 0.25
+SURF 0x10
+mat 0
+refs 4
+102 0.5 0.25
+114 0.5 0.166667
+115 0.583333 0.166667
+103 0.583333 0.25
+SURF 0x10
+mat 0
+refs 4
+101 0.416667 0.25
+113 0.416667 0.166667
+114 0.5 0.166667
+102 0.5 0.25
+SURF 0x10
+mat 0
+refs 4
+100 0.333333 0.25
+112 0.333333 0.166667
+113 0.416667 0.166667
+101 0.416667 0.25
+SURF 0x10
+mat 0
+refs 4
+99 0.25 0.25
+111 0.25 0.166667
+112 0.333333 0.166667
+100 0.333333 0.25
+SURF 0x10
+mat 0
+refs 4
+98 0.166667 0.25
+110 0.166667 0.166667
+111 0.25 0.166667
+99 0.25 0.25
+SURF 0x10
+mat 0
+refs 4
+97 0.0833333 0.25
+109 0.0833333 0.166667
+110 0.166667 0.166667
+98 0.166667 0.25
+SURF 0x10
+mat 0
+refs 4
+108 -2.98023e-008 0.25
+120 -2.98023e-008 0.166667
+109 0.0833333 0.166667
+97 0.0833333 0.25
+SURF 0x10
+mat 0
+refs 4
+95 0.916667 0.333333
+107 0.916667 0.25
+108 1 0.25
+96 1 0.333333
+SURF 0x10
+mat 0
+refs 4
+94 0.833333 0.333333
+106 0.833333 0.25
+107 0.916667 0.25
+95 0.916667 0.333333
+SURF 0x10
+mat 0
+refs 4
+93 0.75 0.333333
+105 0.75 0.25
+106 0.833333 0.25
+94 0.833333 0.333333
+SURF 0x10
+mat 0
+refs 4
+92 0.666667 0.333333
+104 0.666667 0.25
+105 0.75 0.25
+93 0.75 0.333333
+SURF 0x10
+mat 0
+refs 4
+91 0.583333 0.333333
+103 0.583333 0.25
+104 0.666667 0.25
+92 0.666667 0.333333
+SURF 0x10
+mat 0
+refs 4
+90 0.5 0.333333
+102 0.5 0.25
+103 0.583333 0.25
+91 0.583333 0.333333
+SURF 0x10
+mat 0
+refs 4
+89 0.416667 0.333333
+101 0.416667 0.25
+102 0.5 0.25
+90 0.5 0.333333
+SURF 0x10
+mat 0
+refs 4
+88 0.333333 0.333333
+100 0.333333 0.25
+101 0.416667 0.25
+89 0.416667 0.333333
+SURF 0x10
+mat 0
+refs 4
+87 0.25 0.333333
+99 0.25 0.25
+100 0.333333 0.25
+88 0.333333 0.333333
+SURF 0x10
+mat 0
+refs 4
+86 0.166667 0.333333
+98 0.166667 0.25
+99 0.25 0.25
+87 0.25 0.333333
+SURF 0x10
+mat 0
+refs 4
+85 0.0833333 0.333333
+97 0.0833333 0.25
+98 0.166667 0.25
+86 0.166667 0.333333
+SURF 0x10
+mat 0
+refs 4
+96 -2.98023e-008 0.333333
+108 -2.98023e-008 0.25
+97 0.0833333 0.25
+85 0.0833333 0.333333
+SURF 0x10
+mat 0
+refs 4
+83 0.916667 0.416667
+95 0.916667 0.333333
+96 1 0.333333
+84 1 0.416667
+SURF 0x10
+mat 0
+refs 4
+82 0.833333 0.416667
+94 0.833333 0.333333
+95 0.916667 0.333333
+83 0.916667 0.416667
+SURF 0x10
+mat 0
+refs 4
+81 0.75 0.416667
+93 0.75 0.333333
+94 0.833333 0.333333
+82 0.833333 0.416667
+SURF 0x10
+mat 0
+refs 4
+80 0.666667 0.416667
+92 0.666667 0.333333
+93 0.75 0.333333
+81 0.75 0.416667
+SURF 0x10
+mat 0
+refs 4
+79 0.583333 0.416667
+91 0.583333 0.333333
+92 0.666667 0.333333
+80 0.666667 0.416667
+SURF 0x10
+mat 0
+refs 4
+78 0.5 0.416667
+90 0.5 0.333333
+91 0.583333 0.333333
+79 0.583333 0.416667
+SURF 0x10
+mat 0
+refs 4
+77 0.416667 0.416667
+89 0.416667 0.333333
+90 0.5 0.333333
+78 0.5 0.416667
+SURF 0x10
+mat 0
+refs 4
+76 0.333333 0.416667
+88 0.333333 0.333333
+89 0.416667 0.333333
+77 0.416667 0.416667
+SURF 0x10
+mat 0
+refs 4
+75 0.25 0.416667
+87 0.25 0.333333
+88 0.333333 0.333333
+76 0.333333 0.416667
+SURF 0x10
+mat 0
+refs 4
+74 0.166667 0.416667
+86 0.166667 0.333333
+87 0.25 0.333333
+75 0.25 0.416667
+SURF 0x10
+mat 0
+refs 4
+73 0.0833333 0.416667
+85 0.0833333 0.333333
+86 0.166667 0.333333
+74 0.166667 0.416667
+SURF 0x10
+mat 0
+refs 4
+84 -2.98023e-008 0.416667
+96 -2.98023e-008 0.333333
+85 0.0833333 0.333333
+73 0.0833333 0.416667
+SURF 0x10
+mat 0
+refs 4
+71 0.916667 0.5
+83 0.916667 0.416667
+84 1 0.416667
+72 1 0.5
+SURF 0x10
+mat 0
+refs 4
+70 0.833333 0.5
+82 0.833333 0.416667
+83 0.916667 0.416667
+71 0.916667 0.5
+SURF 0x10
+mat 0
+refs 4
+69 0.75 0.5
+81 0.75 0.416667
+82 0.833333 0.416667
+70 0.833333 0.5
+SURF 0x10
+mat 0
+refs 4
+68 0.666667 0.5
+80 0.666667 0.416667
+81 0.75 0.416667
+69 0.75 0.5
+SURF 0x10
+mat 0
+refs 4
+67 0.583333 0.5
+79 0.583333 0.416667
+80 0.666667 0.416667
+68 0.666667 0.5
+SURF 0x10
+mat 0
+refs 4
+66 0.5 0.5
+78 0.5 0.416667
+79 0.583333 0.416667
+67 0.583333 0.5
+SURF 0x10
+mat 0
+refs 4
+65 0.416667 0.5
+77 0.416667 0.416667
+78 0.5 0.416667
+66 0.5 0.5
+SURF 0x10
+mat 0
+refs 4
+64 0.333333 0.5
+76 0.333333 0.416667
+77 0.416667 0.416667
+65 0.416667 0.5
+SURF 0x10
+mat 0
+refs 4
+63 0.25 0.5
+75 0.25 0.416667
+76 0.333333 0.416667
+64 0.333333 0.5
+SURF 0x10
+mat 0
+refs 4
+62 0.166667 0.5
+74 0.166667 0.416667
+75 0.25 0.416667
+63 0.25 0.5
+SURF 0x10
+mat 0
+refs 4
+61 0.0833333 0.5
+73 0.0833333 0.416667
+74 0.166667 0.416667
+62 0.166667 0.5
+SURF 0x10
+mat 0
+refs 4
+72 -2.98023e-008 0.5
+84 -2.98023e-008 0.416667
+73 0.0833333 0.416667
+61 0.0833333 0.5
+SURF 0x10
+mat 0
+refs 4
+59 0.916667 0.583333
+71 0.916667 0.5
+72 1 0.5
+60 1 0.583333
+SURF 0x10
+mat 0
+refs 4
+58 0.833333 0.583333
+70 0.833333 0.5
+71 0.916667 0.5
+59 0.916667 0.583333
+SURF 0x10
+mat 0
+refs 4
+57 0.75 0.583333
+69 0.75 0.5
+70 0.833333 0.5
+58 0.833333 0.583333
+SURF 0x10
+mat 0
+refs 4
+56 0.666667 0.583333
+68 0.666667 0.5
+69 0.75 0.5
+57 0.75 0.583333
+SURF 0x10
+mat 0
+refs 4
+55 0.583333 0.583333
+67 0.583333 0.5
+68 0.666667 0.5
+56 0.666667 0.583333
+SURF 0x10
+mat 0
+refs 4
+54 0.5 0.583333
+66 0.5 0.5
+67 0.583333 0.5
+55 0.583333 0.583333
+SURF 0x10
+mat 0
+refs 4
+53 0.416667 0.583333
+65 0.416667 0.5
+66 0.5 0.5
+54 0.5 0.583333
+SURF 0x10
+mat 0
+refs 4
+52 0.333333 0.583333
+64 0.333333 0.5
+65 0.416667 0.5
+53 0.416667 0.583333
+SURF 0x10
+mat 0
+refs 4
+51 0.25 0.583333
+63 0.25 0.5
+64 0.333333 0.5
+52 0.333333 0.583333
+SURF 0x10
+mat 0
+refs 4
+50 0.166667 0.583333
+62 0.166667 0.5
+63 0.25 0.5
+51 0.25 0.583333
+SURF 0x10
+mat 0
+refs 4
+49 0.0833333 0.583333
+61 0.0833333 0.5
+62 0.166667 0.5
+50 0.166667 0.583333
+SURF 0x10
+mat 0
+refs 4
+60 -2.98023e-008 0.583333
+72 -2.98023e-008 0.5
+61 0.0833333 0.5
+49 0.0833333 0.583333
+SURF 0x10
+mat 0
+refs 4
+47 0.916667 0.666667
+59 0.916667 0.583333
+60 1 0.583333
+48 1 0.666667
+SURF 0x10
+mat 0
+refs 4
+46 0.833333 0.666667
+58 0.833333 0.583333
+59 0.916667 0.583333
+47 0.916667 0.666667
+SURF 0x10
+mat 0
+refs 4
+45 0.75 0.666667
+57 0.75 0.583333
+58 0.833333 0.583333
+46 0.833333 0.666667
+SURF 0x10
+mat 0
+refs 4
+44 0.666667 0.666667
+56 0.666667 0.583333
+57 0.75 0.583333
+45 0.75 0.666667
+SURF 0x10
+mat 0
+refs 4
+43 0.583333 0.666667
+55 0.583333 0.583333
+56 0.666667 0.583333
+44 0.666667 0.666667
+SURF 0x10
+mat 0
+refs 4
+42 0.5 0.666667
+54 0.5 0.583333
+55 0.583333 0.583333
+43 0.583333 0.666667
+SURF 0x10
+mat 0
+refs 4
+41 0.416667 0.666667
+53 0.416667 0.583333
+54 0.5 0.583333
+42 0.5 0.666667
+SURF 0x10
+mat 0
+refs 4
+40 0.333333 0.666667
+52 0.333333 0.583333
+53 0.416667 0.583333
+41 0.416667 0.666667
+SURF 0x10
+mat 0
+refs 4
+39 0.25 0.666667
+51 0.25 0.583333
+52 0.333333 0.583333
+40 0.333333 0.666667
+SURF 0x10
+mat 0
+refs 4
+38 0.166667 0.666667
+50 0.166667 0.583333
+51 0.25 0.583333
+39 0.25 0.666667
+SURF 0x10
+mat 0
+refs 4
+37 0.0833333 0.666667
+49 0.0833333 0.583333
+50 0.166667 0.583333
+38 0.166667 0.666667
+SURF 0x10
+mat 0
+refs 4
+48 -2.98023e-008 0.666667
+60 -2.98023e-008 0.583333
+49 0.0833333 0.583333
+37 0.0833333 0.666667
+SURF 0x10
+mat 0
+refs 4
+35 0.916667 0.75
+47 0.916667 0.666667
+48 1 0.666667
+36 1 0.75
+SURF 0x10
+mat 0
+refs 4
+34 0.833333 0.75
+46 0.833333 0.666667
+47 0.916667 0.666667
+35 0.916667 0.75
+SURF 0x10
+mat 0
+refs 4
+33 0.75 0.75
+45 0.75 0.666667
+46 0.833333 0.666667
+34 0.833333 0.75
+SURF 0x10
+mat 0
+refs 4
+32 0.666667 0.75
+44 0.666667 0.666667
+45 0.75 0.666667
+33 0.75 0.75
+SURF 0x10
+mat 0
+refs 4
+31 0.583333 0.75
+43 0.583333 0.666667
+44 0.666667 0.666667
+32 0.666667 0.75
+SURF 0x10
+mat 0
+refs 4
+30 0.5 0.75
+42 0.5 0.666667
+43 0.583333 0.666667
+31 0.583333 0.75
+SURF 0x10
+mat 0
+refs 4
+29 0.416667 0.75
+41 0.416667 0.666667
+42 0.5 0.666667
+30 0.5 0.75
+SURF 0x10
+mat 0
+refs 4
+28 0.333333 0.75
+40 0.333333 0.666667
+41 0.416667 0.666667
+29 0.416667 0.75
+SURF 0x10
+mat 0
+refs 4
+27 0.25 0.75
+39 0.25 0.666667
+40 0.333333 0.666667
+28 0.333333 0.75
+SURF 0x10
+mat 0
+refs 4
+26 0.166667 0.75
+38 0.166667 0.666667
+39 0.25 0.666667
+27 0.25 0.75
+SURF 0x10
+mat 0
+refs 4
+25 0.0833333 0.75
+37 0.0833333 0.666667
+38 0.166667 0.666667
+26 0.166667 0.75
+SURF 0x10
+mat 0
+refs 4
+36 -2.98023e-008 0.75
+48 -2.98023e-008 0.666667
+37 0.0833333 0.666667
+25 0.0833333 0.75
+SURF 0x10
+mat 0
+refs 4
+23 0.916667 0.833333
+35 0.916667 0.75
+36 1 0.75
+24 1 0.833333
+SURF 0x10
+mat 0
+refs 4
+22 0.833333 0.833333
+34 0.833333 0.75
+35 0.916667 0.75
+23 0.916667 0.833333
+SURF 0x10
+mat 0
+refs 4
+21 0.75 0.833333
+33 0.75 0.75
+34 0.833333 0.75
+22 0.833333 0.833333
+SURF 0x10
+mat 0
+refs 4
+20 0.666667 0.833333
+32 0.666667 0.75
+33 0.75 0.75
+21 0.75 0.833333
+SURF 0x10
+mat 0
+refs 4
+19 0.583333 0.833333
+31 0.583333 0.75
+32 0.666667 0.75
+20 0.666667 0.833333
+SURF 0x10
+mat 0
+refs 4
+18 0.5 0.833333
+30 0.5 0.75
+31 0.583333 0.75
+19 0.583333 0.833333
+SURF 0x10
+mat 0
+refs 4
+17 0.416667 0.833333
+29 0.416667 0.75
+30 0.5 0.75
+18 0.5 0.833333
+SURF 0x10
+mat 0
+refs 4
+16 0.333333 0.833333
+28 0.333333 0.75
+29 0.416667 0.75
+17 0.416667 0.833333
+SURF 0x10
+mat 0
+refs 4
+15 0.25 0.833333
+27 0.25 0.75
+28 0.333333 0.75
+16 0.333333 0.833333
+SURF 0x10
+mat 0
+refs 4
+14 0.166667 0.833333
+26 0.166667 0.75
+27 0.25 0.75
+15 0.25 0.833333
+SURF 0x10
+mat 0
+refs 4
+13 0.0833333 0.833333
+25 0.0833333 0.75
+26 0.166667 0.75
+14 0.166667 0.833333
+SURF 0x10
+mat 0
+refs 4
+24 -2.98023e-008 0.833333
+36 -2.98023e-008 0.75
+25 0.0833333 0.75
+13 0.0833333 0.833333
+SURF 0x10
+mat 0
+refs 4
+11 0.916667 0.916667
+23 0.916667 0.833333
+24 1 0.833333
+12 1 0.916667
+SURF 0x10
+mat 0
+refs 4
+10 0.833333 0.916667
+22 0.833333 0.833333
+23 0.916667 0.833333
+11 0.916667 0.916667
+SURF 0x10
+mat 0
+refs 4
+9 0.75 0.916667
+21 0.75 0.833333
+22 0.833333 0.833333
+10 0.833333 0.916667
+SURF 0x10
+mat 0
+refs 4
+8 0.666667 0.916667
+20 0.666667 0.833333
+21 0.75 0.833333
+9 0.75 0.916667
+SURF 0x10
+mat 0
+refs 4
+7 0.583333 0.916667
+19 0.583333 0.833333
+20 0.666667 0.833333
+8 0.666667 0.916667
+SURF 0x10
+mat 0
+refs 4
+6 0.5 0.916667
+18 0.5 0.833333
+19 0.583333 0.833333
+7 0.583333 0.916667
+SURF 0x10
+mat 0
+refs 4
+5 0.416667 0.916667
+17 0.416667 0.833333
+18 0.5 0.833333
+6 0.5 0.916667
+SURF 0x10
+mat 0
+refs 4
+4 0.333333 0.916667
+16 0.333333 0.833333
+17 0.416667 0.833333
+5 0.416667 0.916667
+SURF 0x10
+mat 0
+refs 4
+3 0.25 0.916667
+15 0.25 0.833333
+16 0.333333 0.833333
+4 0.333333 0.916667
+SURF 0x10
+mat 0
+refs 4
+2 0.166667 0.916667
+14 0.166667 0.833333
+15 0.25 0.833333
+3 0.25 0.916667
+SURF 0x10
+mat 0
+refs 4
+1 0.0833333 0.916667
+13 0.0833333 0.833333
+14 0.166667 0.833333
+2 0.166667 0.916667
+SURF 0x10
+mat 0
+refs 4
+12 -2.98023e-008 0.916667
+24 -2.98023e-008 0.833333
+13 0.0833333 0.833333
+1 0.0833333 0.916667
+SURF 0x10
+mat 0
+refs 3
+1 0.0833333 0.916667
+0 0.0416666 1
+12 -2.98023e-008 0.916667
+SURF 0x10
+mat 0
+refs 3
+2 0.166667 0.916667
+0 0.125 1
+1 0.0833333 0.916667
+SURF 0x10
+mat 0
+refs 3
+3 0.25 0.916667
+0 0.208333 1
+2 0.166667 0.916667
+SURF 0x10
+mat 0
+refs 3
+4 0.333333 0.916667
+0 0.291667 1
+3 0.25 0.916667
+SURF 0x10
+mat 0
+refs 3
+5 0.416667 0.916667
+0 0.375 1
+4 0.333333 0.916667
+SURF 0x10
+mat 0
+refs 3
+6 0.5 0.916667
+0 0.458333 1
+5 0.416667 0.916667
+SURF 0x10
+mat 0
+refs 3
+7 0.583333 0.916667
+0 0.541667 1
+6 0.5 0.916667
+SURF 0x10
+mat 0
+refs 3
+8 0.666667 0.916667
+0 0.625 1
+7 0.583333 0.916667
+SURF 0x10
+mat 0
+refs 3
+9 0.75 0.916667
+0 0.708333 1
+8 0.666667 0.916667
+SURF 0x10
+mat 0
+refs 3
+10 0.833333 0.916667
+0 0.791667 1
+9 0.75 0.916667
+SURF 0x10
+mat 0
+refs 3
+11 0.916667 0.916667
+0 0.875 1
+10 0.833333 0.916667
+SURF 0x10
+mat 0
+refs 3
+12 1 0.916667
+0 0.958333 1
+11 0.916667 0.916667
+SURF 0x10
+mat 0
+refs 3
+132 -2.98023e-008 0.0833333
+133 0.0416666 0
+121 0.0833333 0.0833333
+SURF 0x10
+mat 0
+refs 3
+121 0.0833333 0.0833333
+133 0.125 0
+122 0.166667 0.0833333
+SURF 0x10
+mat 0
+refs 3
+122 0.166667 0.0833333
+133 0.208333 0
+123 0.25 0.0833333
+SURF 0x10
+mat 0
+refs 3
+123 0.25 0.0833333
+133 0.291667 0
+124 0.333333 0.0833333
+SURF 0x10
+mat 0
+refs 3
+124 0.333333 0.0833333
+133 0.375 0
+125 0.416667 0.0833333
+SURF 0x10
+mat 0
+refs 3
+125 0.416667 0.0833333
+133 0.458333 0
+126 0.5 0.0833333
+SURF 0x10
+mat 0
+refs 3
+126 0.5 0.0833333
+133 0.541667 0
+127 0.583333 0.0833333
+SURF 0x10
+mat 0
+refs 3
+127 0.583333 0.0833333
+133 0.625 0
+128 0.666667 0.0833333
+SURF 0x10
+mat 0
+refs 3
+128 0.666667 0.0833333
+133 0.708333 0
+129 0.75 0.0833333
+SURF 0x10
+mat 0
+refs 3
+129 0.75 0.0833333
+133 0.791667 0
+130 0.833333 0.0833333
+SURF 0x10
+mat 0
+refs 3
+130 0.833333 0.0833333
+133 0.875 0
+131 0.916667 0.0833333
+SURF 0x10
+mat 0
+refs 3
+131 0.916667 0.0833333
+133 0.958333 0
+132 1 0.0833333
+kids 0

BIN
test/models/ASE/ThreeCubesGreen_UTF16BE.ASE


BIN
test/models/ASE/ThreeCubesGreen_UTF16LE.ASE


BIN
test/models/Collada/cube_UTF16LE.dae


+ 210 - 0
test/models/Collada/cube_UTF8BOM.dae

@@ -0,0 +1,210 @@
+<?xml version="1.0"?>
+<COLLADA xmlns="http://www.collada.org/2005/11/COLLADASchema" version="1.4.1">
+    <asset>
+        <contributor>
+            <author>alorino</author>
+            <authoring_tool>Maya 7.0 | ColladaMaya v2.01 Jun  9 2006 at 16:08:19 | FCollada v1.11</authoring_tool>
+            <comments>Collada Maya Export Options: bakeTransforms=0;exportPolygonMeshes=1;bakeLighting=0;isSampling=0;
+curveConstrainSampling=0;exportCameraAsLookat=0;
+exportLights=1;exportCameras=1;exportJointsAndSkin=1;
+exportAnimations=1;exportTriangles=0;exportInvisibleNodes=0;
+exportNormals=1;exportTexCoords=1;exportVertexColors=1;exportTangents=0;
+exportTexTangents=0;exportConstraints=0;exportPhysics=0;exportXRefs=1;
+dereferenceXRefs=0;cameraXFov=0;cameraYFov=1</comments>
+            <copyright>
+Copyright 2006 Sony Computer Entertainment Inc.
+Licensed under the SCEA Shared Source License, Version 1.0 (the
+&quot;License&quot;); you may not use this file except in compliance with the
+License. You may obtain a copy of the License at:
+http://research.scea.com/scea_shared_source_license.html 
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an &quot;AS IS&quot; BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+</copyright>
+        </contributor>
+        <created>2006-06-21T21:23:22Z</created>
+        <modified>2006-06-21T21:23:22Z</modified>
+        <unit meter="0.01" name="centimeter"/>
+        <up_axis>Y_UP</up_axis>
+    </asset>
+    <library_cameras>
+        <camera id="PerspCamera" name="PerspCamera">
+            <optics>
+                <technique_common>
+                    <perspective>
+                        <yfov>37.8493</yfov>
+                        <aspect_ratio>1</aspect_ratio>
+                        <znear>10</znear>
+                        <zfar>1000</zfar>
+                    </perspective>
+                </technique_common>
+            </optics>
+        </camera>
+        <camera id="testCameraShape" name="testCameraShape">
+            <optics>
+                <technique_common>
+                    <perspective>
+                        <yfov>37.8501</yfov>
+                        <aspect_ratio>1</aspect_ratio>
+                        <znear>0.01</znear>
+                        <zfar>1000</zfar>
+                    </perspective>
+                </technique_common>
+            </optics>
+        </camera>
+    </library_cameras>
+    <library_lights>
+        <light id="light-lib" name="light">
+            <technique_common>
+                <point>
+                    <color>1 1 1</color>
+                    <constant_attenuation>1</constant_attenuation>
+                    <linear_attenuation>0</linear_attenuation>
+                    <quadratic_attenuation>0</quadratic_attenuation>
+                </point>
+            </technique_common>
+            <technique profile="MAX3D">
+                <intensity>1.000000</intensity>
+            </technique>
+        </light>
+        <light id="pointLightShape1-lib" name="pointLightShape1">
+            <technique_common>
+                <point>
+                    <color>1 1 1</color>
+                    <constant_attenuation>1</constant_attenuation>
+                    <linear_attenuation>0</linear_attenuation>
+                    <quadratic_attenuation>0</quadratic_attenuation>
+                </point>
+            </technique_common>
+        </light>
+    </library_lights>
+    <library_materials>
+        <material id="Blue" name="Blue">
+            <instance_effect url="#Blue-fx"/>
+        </material>
+    </library_materials>
+    <library_effects>
+        <effect id="Blue-fx">
+            <profile_COMMON>
+                <technique sid="common">
+                    <phong>
+                        <emission>
+                            <color>0 0 0 1</color>
+                        </emission>
+                        <ambient>
+                            <color>0 0 0 1</color>
+                        </ambient>
+                        <diffuse>
+                            <color>0.137255 0.403922 0.870588 1</color>
+                        </diffuse>
+                        <specular>
+                            <color>0.5 0.5 0.5 1</color>
+                        </specular>
+                        <shininess>
+                            <float>16</float>
+                        </shininess>
+                        <reflective>
+                            <color>0 0 0 1</color>
+                        </reflective>
+                        <reflectivity>
+                            <float>0.5</float>
+                        </reflectivity>
+                        <transparent>
+                            <color>0 0 0 1</color>
+                        </transparent>
+                        <transparency>
+                            <float>1</float>
+                        </transparency>
+                        <index_of_refraction>
+                            <float>0</float>
+                        </index_of_refraction>
+                    </phong>
+                </technique>
+            </profile_COMMON>
+        </effect>
+    </library_effects>
+    <library_geometries>
+        <geometry id="box-lib" name="box">
+            <mesh>
+                <source id="box-lib-positions" name="position">
+                    <float_array id="box-lib-positions-array" count="24">-50 50 50 50 50 50 -50 -50 50 50 -50 50 -50 50 -50 50 50 -50 -50 -50 -50 50 -50 -50</float_array>
+                    <technique_common>
+                        <accessor count="8" offset="0" source="#box-lib-positions-array" stride="3">
+                            <param name="X" type="float"></param>
+                            <param name="Y" type="float"></param>
+                            <param name="Z" type="float"></param>
+                        </accessor>
+                    </technique_common>
+                </source>
+                <source id="box-lib-normals" name="normal">
+                    <float_array id="box-lib-normals-array" count="72">0 0 1 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 -1 0 0 -1 0 0 -1 0 0 -1 0 -1 0 0 -1 0 0 -1 0 0 -1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 0 0 -1 0 0 -1 0 0 -1 0 0 -1</float_array>
+                    <technique_common>
+                        <accessor count="24" offset="0" source="#box-lib-normals-array" stride="3">
+                            <param name="X" type="float"></param>
+                            <param name="Y" type="float"></param>
+                            <param name="Z" type="float"></param>
+                        </accessor>
+                    </technique_common>
+                </source>
+                <vertices id="box-lib-vertices">
+                    <input semantic="POSITION" source="#box-lib-positions"/>
+                </vertices>
+                <polylist count="6" material="BlueSG">
+                    <input offset="0" semantic="VERTEX" source="#box-lib-vertices"/>
+                    <input offset="1" semantic="NORMAL" source="#box-lib-normals"/>
+                    <vcount>4 4 4 4 4 4</vcount>
+                    <p>0 0 2 1 3 2 1 3 0 4 1 5 5 6 4 7 6 8 7 9 3 10 2 11 0 12 4 13 6 14 2 15 3 16 7 17 5 18 1 19 5 20 7 21 6 22 4 23</p>
+                </polylist>
+            </mesh>
+        </geometry>
+    </library_geometries>
+    <library_visual_scenes>
+        <visual_scene id="VisualSceneNode" name="untitled">
+            <node id="Camera" name="Camera">
+                <translate sid="translate">-427.749 333.855 655.017</translate>
+                <rotate sid="rotateY">0 1 0 -33</rotate>
+                <rotate sid="rotateX">1 0 0 -22.1954</rotate>
+                <rotate sid="rotateZ">0 0 1 0</rotate>
+                <instance_camera url="#PerspCamera"/>
+            </node>
+            <node id="Light" name="カタカナ">
+                <translate sid="translate">-500 1000 400</translate>
+                <rotate sid="rotateZ">0 0 1 0</rotate>
+                <rotate sid="rotateY">0 1 0 0</rotate>
+                <rotate sid="rotateX">1 0 0 0</rotate>
+                <instance_light url="#light-lib"/>
+            </node>
+            <node id="Box" name="日本語">
+                <rotate sid="rotateZ">0 0 1 0</rotate>
+                <rotate sid="rotateY">0 1 0 0</rotate>
+                <rotate sid="rotateX">1 0 0 0</rotate>
+                <instance_geometry url="#box-lib">
+                    <bind_material>
+                        <technique_common>
+                            <instance_material symbol="BlueSG" target="#Blue"/>
+                        </technique_common>
+                    </bind_material>
+                </instance_geometry>
+            </node>
+            <node id="testCamera" name="testCamera">
+                <translate sid="translate">-427.749 333.855 655.017</translate>
+                <rotate sid="rotateY">0 1 0 -33</rotate>
+                <rotate sid="rotateX">1 0 0 -22.1954</rotate>
+                <rotate sid="rotateZ">0 0 1 0</rotate>
+                <instance_camera url="#testCameraShape"/>
+            </node>
+            <node id="pointLight1" name="漢字">
+                <translate sid="translate">3 4 10</translate>
+                <rotate sid="rotateZ">0 0 1 0</rotate>
+                <rotate sid="rotateY">0 1 0 0</rotate>
+                <rotate sid="rotateX">1 0 0 0</rotate>
+                <instance_light url="#pointLightShape1-lib"/>
+            </node>
+        </visual_scene>
+    </library_visual_scenes>
+    <scene>
+        <instance_visual_scene url="#VisualSceneNode"/>
+    </scene>
+</COLLADA>

BIN
test/models/OBJ/box_UTF16BE.obj


+ 19 - 10
tools/assimp_view/Display.cpp

@@ -170,10 +170,15 @@ int CDisplay::AddNodeToDisplayList(
 	}
 	else strcpy(chTemp,pcNode->mName.data);
 
-	TVITEMEX tvi; 
-	TVINSERTSTRUCT sNew;
-	tvi.pszText = chTemp;
-	tvi.cchTextMax = (int)strlen(chTemp);
+	TVITEMEXW tvi; 
+	TVINSERTSTRUCTW sNew;
+	
+	wchar_t tmp[512];
+	int t = MultiByteToWideChar(CP_UTF8,0,chTemp,-1,tmp,512);
+	
+	tvi.pszText = tmp;
+	tvi.cchTextMax = (int)t;
+
 	tvi.mask = TVIF_TEXT | TVIF_SELECTEDIMAGE | TVIF_IMAGE | TVIF_HANDLE | TVIF_PARAM;
 	tvi.iImage = this->m_aiImageList[AI_VIEW_IMGLIST_NODE];
 	tvi.iSelectedImage = this->m_aiImageList[AI_VIEW_IMGLIST_NODE];
@@ -185,7 +190,7 @@ int CDisplay::AddNodeToDisplayList(
 
 	// add the item to the list
 	HTREEITEM hTexture = (HTREEITEM)SendMessage(GetDlgItem(g_hDlg,IDC_TREE1), 
-		TVM_INSERTITEM, 
+		TVM_INSERTITEMW, 
 		0,
 		(LPARAM)(LPTVINSERTSTRUCT)&sNew);
 
@@ -511,10 +516,14 @@ int CDisplay::AddMaterialToDisplayList(HTREEITEM hRoot,
 	{
 		sprintf(chTemp,"%s (%i)",szOut.data,iIndex+1);
 	}
-	TVITEMEX tvi; 
-	TVINSERTSTRUCT sNew;
-	tvi.pszText = chTemp;
-	tvi.cchTextMax = (int)strlen(chTemp);
+	TVITEMEXW tvi; 
+	TVINSERTSTRUCTW sNew;
+
+	wchar_t tmp[512];
+	int t = MultiByteToWideChar(CP_UTF8,0,chTemp,-1,tmp,512);
+	
+	tvi.pszText = tmp;
+	tvi.cchTextMax = (int)t;
 	tvi.mask = TVIF_TEXT | TVIF_SELECTEDIMAGE | TVIF_IMAGE | TVIF_HANDLE | TVIF_PARAM ;
 	tvi.iImage = m_aiImageList[AI_VIEW_IMGLIST_MATERIAL];
 	tvi.iSelectedImage = m_aiImageList[AI_VIEW_IMGLIST_MATERIAL];
@@ -527,7 +536,7 @@ int CDisplay::AddMaterialToDisplayList(HTREEITEM hRoot,
 
 	// add the item to the list
 	HTREEITEM hTexture = (HTREEITEM)SendMessage(GetDlgItem(g_hDlg,IDC_TREE1), 
-		TVM_INSERTITEM, 
+		TVM_INSERTITEMW, 
 		0,
 		(LPARAM)(LPTVINSERTSTRUCT)&sNew);
 

+ 76 - 0
workspaces/vc8/assimp.vcproj

@@ -3355,6 +3355,82 @@
 						>
 					</File>
 				</Filter>
+				<Filter
+					Name="ConvertUTF"
+					>
+					<File
+						RelativePath="..\..\contrib\ConvertUTF\ConvertUTF.c"
+						>
+						<FileConfiguration
+							Name="debug|Win32"
+							>
+							<Tool
+								Name="VCCLCompilerTool"
+								UsePrecompiledHeader="0"
+							/>
+						</FileConfiguration>
+						<FileConfiguration
+							Name="release|Win32"
+							>
+							<Tool
+								Name="VCCLCompilerTool"
+								UsePrecompiledHeader="0"
+							/>
+						</FileConfiguration>
+						<FileConfiguration
+							Name="release-dll|Win32"
+							>
+							<Tool
+								Name="VCCLCompilerTool"
+								UsePrecompiledHeader="0"
+							/>
+						</FileConfiguration>
+						<FileConfiguration
+							Name="debug-dll|Win32"
+							>
+							<Tool
+								Name="VCCLCompilerTool"
+								UsePrecompiledHeader="0"
+							/>
+						</FileConfiguration>
+						<FileConfiguration
+							Name="release-noboost-st|Win32"
+							>
+							<Tool
+								Name="VCCLCompilerTool"
+								UsePrecompiledHeader="0"
+							/>
+						</FileConfiguration>
+						<FileConfiguration
+							Name="debug-noboost-st|Win32"
+							>
+							<Tool
+								Name="VCCLCompilerTool"
+								UsePrecompiledHeader="0"
+							/>
+						</FileConfiguration>
+						<FileConfiguration
+							Name="debug-st|Win32"
+							>
+							<Tool
+								Name="VCCLCompilerTool"
+								UsePrecompiledHeader="0"
+							/>
+						</FileConfiguration>
+						<FileConfiguration
+							Name="release-st|Win32"
+							>
+							<Tool
+								Name="VCCLCompilerTool"
+								UsePrecompiledHeader="0"
+							/>
+						</FileConfiguration>
+					</File>
+					<File
+						RelativePath="..\..\contrib\ConvertUTF\ConvertUTF.h"
+						>
+					</File>
+				</Filter>
 			</Filter>
 			<Filter
 				Name="core"

+ 76 - 0
workspaces/vc9/assimp.vcproj

@@ -3407,6 +3407,82 @@
 						>
 					</File>
 				</Filter>
+				<Filter
+					Name="ConvertUTF"
+					>
+					<File
+						RelativePath="..\..\contrib\ConvertUTF\ConvertUTF.c"
+						>
+						<FileConfiguration
+							Name="debug|Win32"
+							>
+							<Tool
+								Name="VCCLCompilerTool"
+								UsePrecompiledHeader="0"
+							/>
+						</FileConfiguration>
+						<FileConfiguration
+							Name="release|Win32"
+							>
+							<Tool
+								Name="VCCLCompilerTool"
+								UsePrecompiledHeader="0"
+							/>
+						</FileConfiguration>
+						<FileConfiguration
+							Name="release-dll|Win32"
+							>
+							<Tool
+								Name="VCCLCompilerTool"
+								UsePrecompiledHeader="0"
+							/>
+						</FileConfiguration>
+						<FileConfiguration
+							Name="debug-dll|Win32"
+							>
+							<Tool
+								Name="VCCLCompilerTool"
+								UsePrecompiledHeader="0"
+							/>
+						</FileConfiguration>
+						<FileConfiguration
+							Name="release-noboost-st|Win32"
+							>
+							<Tool
+								Name="VCCLCompilerTool"
+								UsePrecompiledHeader="0"
+							/>
+						</FileConfiguration>
+						<FileConfiguration
+							Name="debug-noboost-st|Win32"
+							>
+							<Tool
+								Name="VCCLCompilerTool"
+								UsePrecompiledHeader="0"
+							/>
+						</FileConfiguration>
+						<FileConfiguration
+							Name="debug-st|Win32"
+							>
+							<Tool
+								Name="VCCLCompilerTool"
+								UsePrecompiledHeader="0"
+							/>
+						</FileConfiguration>
+						<FileConfiguration
+							Name="release-st|Win32"
+							>
+							<Tool
+								Name="VCCLCompilerTool"
+								UsePrecompiledHeader="0"
+							/>
+						</FileConfiguration>
+					</File>
+					<File
+						RelativePath="..\..\contrib\ConvertUTF\ConvertUTF.h"
+						>
+					</File>
+				</Filter>
 			</Filter>
 			<Filter
 				Name="util"