| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554 |
- /******************************************************************************/
- #include "stdafx.h"
- namespace EE{
- /******************************************************************************/
- #define BOM_UTF_8 0xBBEF
- #define BOM_UTF_8_3 0xBF
- #define BOM_UTF_16 0xFEFF
- /******************************************************************************/
- static ENCODING LoadEncoding(File &f)
- {
- if(f.size()>=2) // encoding can be present only if there are at least 2 bytes
- {
- switch(f.getUShort())
- {
- case BOM_UTF_16: return UTF_16;
- case BOM_UTF_8 : if(f.getByte()==BOM_UTF_8_3)return UTF_8; break;
- }
- f.pos(0); // if there was no byte order mark found then reset position to the start
- }
- return ANSI; // default encoding
- }
- static void SaveEncoding(File &f, ENCODING encoding)
- {
- if(!f.size())switch(encoding) // we can save encoding only at the start of the file
- {
- case UTF_16: f.putUShort(BOM_UTF_16); break;
- case UTF_8 : f.putMulti (U16(BOM_UTF_8), Byte(BOM_UTF_8_3)); break;
- }
- }
- /******************************************************************************/
- void FileText::zero () {fix_new_line=true; indent=INDENT_TABS; depth=0; _code=ANSI;}
- FileText& FileText::del () {_f.del(); zero(); return T;}
- FileText::FileText() {zero();}
- FileText& FileText::writeMem(ENCODING encoding, Cipher *cipher)
- {
- del();
- _f.writeMem(65536, cipher);
- SaveEncoding(_f, T._code=encoding);
- return T;
- }
- Bool FileText::write(C Str &name, ENCODING encoding, Cipher *cipher)
- {
- del();
- if(_f.writeTry(name, cipher))
- {
- SaveEncoding(_f, T._code=encoding);
- return true;
- }
- return false;
- }
- Bool FileText::append(C Str &name, ENCODING encoding, Cipher *cipher)
- {
- del();
- if(_f.readStdTry(name, cipher))if(_f.size())encoding=LoadEncoding(_f); // take encoding from the file if it has some data
- if(_f. appendTry(name, cipher))
- {
- SaveEncoding(_f, T._code=encoding);
- return true;
- }
- return false;
- }
- Bool FileText::read(C Str &name, Cipher *cipher)
- {
- del();
- if(_f.readTry(name, cipher))
- {
- _code=LoadEncoding(_f);
- return true;
- }
- return false;
- }
- Bool FileText::read(C UID &id, Cipher *cipher)
- {
- del();
- if(_f.readTry(id, cipher))
- {
- _code=LoadEncoding(_f);
- return true;
- }
- return false;
- }
- Bool FileText::read(C Str &name, Pak &pak)
- {
- del();
- if(_f.readTry(name, pak))
- {
- _code=LoadEncoding(_f);
- return true;
- }
- return false;
- }
- Bool FileText::read(C UID &id, Pak &pak)
- {
- del();
- if(_f.readTry(id, pak))
- {
- _code=LoadEncoding(_f);
- return true;
- }
- return false;
- }
- FileText& FileText::readMem(CPtr data, Int size, Int encoding, Cipher *cipher)
- {
- del();
- _f.readMem(data, size, cipher);
- _code=LoadEncoding(_f); // load encoding always to skip potential BOM
- if(encoding>=0)_code=(ENCODING)encoding; // override encoding if it was specified
- return T;
- }
- /******************************************************************************/
- FileText& FileText::startLine()
- {
- if(indent)REP(depth)if(indent==INDENT_TABS)putChar('\t');else putText(" ");
- return T;
- }
- FileText& FileText::endLine()
- {
- switch(_code)
- {
- case ANSI :
- case UTF_8 :
- case UTF_8_NAKED: if(fix_new_line)_f.putUShort(0x0A0D);else _f.putByte(0x0A); break;
- case UTF_16: if(fix_new_line)_f.putUInt(0x000A000D);else _f.putUShort(0x000A); break;
- }
- return T;
- }
- FileText& FileText::putChar(Char8 c)
- {
- switch(c)
- {
- case '\n': endLine(); break;
- case '\r': if(fix_new_line)break; // if we're fixing new line, then never write '\r' manually, otherwise continue, that's why there's no break on purpose
- default : switch(_code)
- {
- case ANSI : _f.putByte ( c ); break;
- case UTF_16: _f.putUShort(Char8To16Fast(c)); break; // we can assume that Str was already initialized
- case UTF_8 :
- case UTF_8_NAKED:
- {
- U8 u=c;
- if(u<=0x7F)_f.putByte (u);
- else _f.putMulti(Byte(0xC0 | (u>>6)), Byte(0x80 | (u&0x3F)));
- }break;
- }break;
- }
- return T;
- }
- FileText& FileText::putChar(Char c)
- {
- switch(c)
- {
- case '\n': endLine(); break;
- case '\r': if(fix_new_line)break; // if we're fixing new line, then never write '\r' manually, otherwise continue, that's why there's no break on purpose
- default : switch(_code)
- {
- case ANSI : _f.putByte (Char16To8Fast(c)); break; // we can assume that Str was already initialized
- case UTF_16: _f.putUShort( c ); break;
- case UTF_8 :
- case UTF_8_NAKED:
- {
- U16 u=c;
- if(u<=0x07F)_f.putByte (u);else
- if(u<=0x7FF)_f.putMulti(Byte(0xC0 | (u>> 6)), Byte(0x80 | ( u &0x3F)));else
- _f.putMulti(Byte(0xE0 | (u>>12)), Byte(0x80 | ((u>>6)&0x3F)), Byte(0x80 | (u&0x3F)));
- }break;
- }break;
- }
- return T;
- }
- FileText& FileText::putText(C Str &text)
- {
- Char temp[65536/SIZE(Char)]; // use Char to force alignment
- switch(_code)
- {
- case ANSI: if(!fix_new_line){Str8 t=text; _f.putN(t(), t.length());}else
- {
- Str8 t=FixNewLine(text); _f.putN(t(), t.length());
- }break;
- case UTF_16: if(!fix_new_line){_f.putN(text(), text.length());}else
- {
- #if 0 // slower
- Str t=FixNewLine(text); _f.putN(t(), t.length());
- #else // faster
- const Int buf_elms=Elms(temp)-1; // use size -1 because we may be writing 2 characters in one step for "\r\n"
- for(Int i=0, buf_pos=0; ; )
- {
- Bool end=(i>=text.length());
- if( end || !InRange(buf_pos, buf_elms)) // if finished, or there's no more room in the buffer
- {
- if(buf_pos){_f.putN(temp, buf_pos); buf_pos=0;} // flush
- if(end)break;
- }
- U16 c=text()[i++]; // () avoids range checks
- if( c!='\r')
- if( c!='\n')temp[buf_pos++]=c;else
- {
- temp[buf_pos++]='\r';
- temp[buf_pos++]='\n';
- }
- }
- #endif
- }break;
- case UTF_8 :
- case UTF_8_NAKED:
- {
- #if 0 // slower
- FREPA(text)putChar(text[i]);
- #else // faster
- Char8 *C buf =(Char8*)temp;
- const Int buf_elms=SIZE(temp)/SIZE(Char8)-2; // use size -2 because we may be writing 3 characters in one step
- for(Int i=0, buf_pos=0; ; )
- {
- Bool end=(i>=text.length());
- if( end || !InRange(buf_pos, buf_elms)) // if finished, or there's no more room in the buffer
- {
- if(buf_pos){_f.putN(buf, buf_pos); buf_pos=0;} // flush
- if(end)break;
- }
- U16 c=text()[i++]; // () avoids range checks
- if(fix_new_line)switch(c)
- {
- case '\r': continue; // use 'continue' to don't store this character
- case '\n':
- {
- buf[buf_pos++]='\r';
- buf[buf_pos++]='\n';
- }continue; // use 'continue' because we've already stored this character
- }
- if(c<=0x07F) buf[buf_pos++]=c;else
- if(c<=0x7FF){buf[buf_pos++]=(0xC0 | (c>> 6)); buf[buf_pos++]=(0x80 | ( c &0x3F));}else
- {buf[buf_pos++]=(0xE0 | (c>>12)); buf[buf_pos++]=(0x80 | ((c>>6)&0x3F)); buf[buf_pos++]=(0x80 | (c&0x3F));}
- }
- #endif
- }break;
- }
- return T;
- }
- FileText& FileText::putLine(C Str &text)
- {
- return startLine().putText(text).endLine();
- }
- /******************************************************************************/
- Char FileText::getChar()
- {
- switch(_code)
- {
- default : return 0;
- case UTF_16: return _f.getUShort();
- case ANSI:
- {
- Char8 c[2]; c[0]=_f.getByte();
- #if WINDOWS // Code Pages are used on Windows
- if((c[0]&0x80) && !_f.end()) // this may be a 2-character multi-byte wide char
- {
- c[1]=_f.getByte();
- wchar_t w[2]; Int size=MultiByteToWideChar(CP_ACP, 0, c, Elms(c), w, Elms(w));
- if(size==1)return w[0]; // if 2 bytes generated 1 wide char, then return it
- _f.skip(-1); // otherwise, go back the extra byte that we've read, because we want to get only 1 char at this time
- }
- #endif
- return Char8To16Fast(c[0]); // we can assume that Str was already initialized
- }break;
- case UTF_8 :
- case UTF_8_NAKED:
- {
- Byte b0=_f.getByte();
- if(b0&(1<<7))
- {
- Byte b1=(_f.getByte()&0x3F);
- if((b0&(1<<6)) && (b0&(1<<5)))
- {
- Byte b2=(_f.getByte()&0x3F);
- if(b0&(1<<4))
- {
- Byte b3=(_f.getByte()&0x3F);
- b0&=0x07;
- UInt u=(b3|(b2<<6)|(b1<<12)|(b0<<18));
- return (u<=0xFFFF) ? u : '?';
- }else
- {
- b0&=0x0F;
- return b2|(b1<<6)|(b0<<12);
- }
- }else
- {
- b0&=0x1F;
- return b1|(b0<<6);
- }
- }else
- {
- return b0;
- }
- }break;
- }
- }
- FileText& FileText::skipLine()
- {
- for(; !end(); )
- {
- if(getChar()=='\n')break;
- }
- return T;
- }
- FileText& FileText::fullLine(Str &s)
- {
- s.clear();
- for(; !end(); )
- {
- Char c=getChar();
- if( c=='\n')break;
- if(U16(c)>=32 || c=='\t')s+=c;
- }
- return T;
- }
- FileText& FileText::getLine(Str &s)
- {
- s.clear();
- for(Bool start=true; !end(); )
- {
- Char c=getChar();
- if( c=='\n')break;
- if(U16(c)>=32 || c=='\t')
- {
- if(c!=' ' && c!='\t')start=false;
- if(!start)s+=c;
- }
- }
- return T;
- }
- FileText& FileText::getLine(Str8 &s)
- {
- s.clear();
- for(Bool start=true; !end(); )
- {
- Char c=getChar();
- if( c=='\n')break;
- if(U16(c)>=32 || c=='\t')
- {
- if(c!=' ' && c!='\t')start=false;
- if(!start)s+=c;
- }
- }
- return T;
- }
- FileText& FileText::getAll(Str &s)
- {
- s.clear();
- Int chars=((_code==UTF_16) ? _f.left()/SIZEI(Char) : _f.left()); if(chars>0)
- {
- s.reserve(chars); switch(_code)
- {
- case UTF_16:
- {
- s._length=_f.getReturnSize(s._d.data(), chars*SIZEI(Char))/SIZEI(Char); if(s.length()!=chars)_f._ok=false;
- #if WINDOWS
- utf_16:
- #endif
- Int length=s.length(); CChar *t=s();
- FREP(length)
- {
- Char c=t[i]; if(!(U16(c)>=32 || c=='\t' || c=='\n')) // found invalid char
- {
- Str temp; temp.reserve(length);
- CopyFastN(temp._d.data(), t, i); temp._length+=i; // copy everything up to this point
- #if 0 // simple iteration (slower)
- for(; ++i<length; ){Char c=t[i]; if(U16(c)>=32 || c=='\t' || c=='\n')temp._d[temp._length++]=c;}
- #else // batched copying
- Int last_ok=i+1; for(; ++i<length; )
- {
- Char c=t[i]; if(!(U16(c)>=32 || c=='\t' || c=='\n'))
- {
- Int copy=i-last_ok; CopyFastN(temp._d.data()+temp._length, t+last_ok, copy); temp._length+=copy;
- last_ok=i+1;
- }
- }
- Int copy=length-last_ok; CopyFastN(temp._d.data()+temp._length, t+last_ok, copy); temp._length+=copy;
- #endif
- Swap(temp, s);
- break;
- }
- }
- }break;
- case ANSI:
- {
- Str8 s8; s8.reserve(chars);
- Int length=s8._length=_f.getReturnSize(s8._d.data(), chars); if(length!=chars)_f._ok=false;
- Char *t=s._d.data(); CChar8 *t8=s8();
- FREP(length)
- {
- Char8 c=t8[i];
- if(U8(c)>=32 || c=='\t' || c=='\n')
- {
- #if WINDOWS // Code Pages are used on Windows
- if(c&0x80) // this may be a 2-character multi-byte wide char
- {
- s._length=MultiByteToWideChar(CP_ACP, 0, t8, length, WChar(s._d.data()), chars); // let OS handle multi-byte conversion
- goto utf_16; // we've converted to 16-bit so process as UTF-16
- }
- #endif
- *t++=Char8To16Fast(c); // we can assume that Str was already initialized
- }
- }
- s._length=t-s._d.data();
- }break;
- default:
- {
- Char *t=s._d.data();
- for(; !end(); )
- {
- Char c=getChar();
- if(U16(c)>=32 || c=='\t' || c=='\n')*t++=c;
- }
- s._length=t-s._d.data();
- }break;
- }
- s._d[s._length]='\0';
- }
- return T;
- }
- FileText& FileText::rewind()
- {
- depth=0;
- _f.pos(0);
- LoadEncoding(_f); // we already know the encoding, but we need to skip the byte order mark at start, don't set it to '_code' because 'UTF_8_NAKED' may had been used
- return T;
- }
- Bool FileText::copy(File &dest)
- {
- depth=0;
- if(!_f.pos(0))return false;
- return _f.copy(dest);
- }
- Char FileText::posInfo(Long pos, VecI2 &col_line)
- {
- rewind();
- Char last='\0'; VecI2 cl=0;
- if(pos>=T.pos())
- for(MIN(pos, size()); ; ) // this will allow reading right after the last character
- {
- last=getChar();
- if(T.pos()>pos || !ok())break;
- if(last=='\n'){cl.x=0; cl.y++;}else
- if(last!='\r') cl.x++;
- }
- col_line=cl; return last;
- }
- /******************************************************************************/
- }
- /******************************************************************************/
- void FileTextEx::get(Int &i) { i=getInt();}
- void FileTextEx::get(Flt &f) { f=getFlt();}
- void FileTextEx::get(Dbl &d) { d=getDbl();}
- void FileTextEx::get(Vec2 &v) {v.x=getFlt(); v.y=getFlt();}
- void FileTextEx::get(Vec &v) {v.x=getFlt(); v.y=getFlt(); v.z=getFlt();}
- void FileTextEx::get(Vec4 &v) {v.x=getFlt(); v.y=getFlt(); v.z=getFlt(); v.w=getFlt();}
- void FileTextEx::get(VecI2 &v) {v.x=getInt(); v.y=getInt();}
- void FileTextEx::get(VecI &v) {v.x=getInt(); v.y=getInt(); v.z=getInt();}
- void FileTextEx::get(VecI4 &v) {v.x=getInt(); v.y=getInt(); v.z=getInt(); v.w=getInt();}
- void FileTextEx::get(VecB4 &v) {v.x=getInt(); v.y=getInt(); v.z=getInt(); v.w=getInt();}
- Bool FileTextEx::getBool () {return TextBool(getWord());}
- Int FileTextEx::getInt () {return TextInt (getWord());}
- UInt FileTextEx::getUInt () {return TextUInt(getWord());}
- Flt FileTextEx::getFlt () {return TextFlt (getWord());}
- Dbl FileTextEx::getDbl () {return TextDbl (getWord());}
- Vec2 FileTextEx::getVec2 () {Vec2 v; get(v); return v;}
- Vec FileTextEx::getVec () {Vec v; get(v); return v;}
- Vec4 FileTextEx::getVec4 () {Vec4 v; get(v); return v;}
- VecI2 FileTextEx::getVecI2() {VecI2 v; get(v); return v;}
- VecI FileTextEx::getVecI () {VecI v; get(v); return v;}
- VecI4 FileTextEx::getVecI4() {VecI4 v; get(v); return v;}
- VecB4 FileTextEx::getVecB4() {VecB4 v; get(v); return v;}
- C Str& FileTextEx::getWord()
- {
- text.clear();
- for(Bool start=true; !end(); )
- {
- Char c=getChar();
- if( !c)break;
- if( c=='\t' || c=='\n' || c==' ')if(start)continue;else break;
- if(U16(c)>32)
- {
- start=false;
- text+=c;
- }
- }
- return text;
- }
- C Str& FileTextEx::getName()
- {
- text.clear();
- for(; !end(); )
- {
- Char c=getChar();
- if( !c || c=='\n')break;
- if( c=='"')
- {
- for(; !end(); )
- {
- Char c=getChar();
- if( !c || c=='\n' || c=='"')break;
- if(U16(c)>=32 || c=='\t')text+=c;
- }
- break;
- }
- }
- return text;
- }
- /******************************************************************************/
- Bool FileTextEx::getIn()
- {
- for(; !end(); )
- {
- getWord();
- if(text.first()=='{')return true;
- if(text.first()=='}')return false;
- }
- return false;
- }
- void FileTextEx::getOut()
- {
- for(Int depth=0; !end(); )
- {
- getWord();
- if(text.first()=='{')depth++;else
- if(text.first()=='}')if(--depth<0)break;
- }
- }
- Bool FileTextEx::level()
- {
- for(; !end(); )
- {
- getWord();
- if(text.first()=='}')break;
- if(text.first()=='{'){getOut(); continue;}
- return true;
- }
- return false;
- }
- /******************************************************************************/
|