// -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
//
// internal System.Xml.XmlUtil
//
// Author:
// Daniel Weber (daniel-weber@austin.rr.com)
// Code ported from Open XML 2.3.17 (Delphi/Kylix)
//
// (C) 2001 Daniel Weber
//
using System;
using System.IO;
namespace System.Xml
{
///
/// Helper class with static utility functions that are not Xml version specific
/// Such as encoding changes
///
internal class XmlUtil
{
public static char Iso8859_1ToUTF16Char(byte P)
{
return (char) P;
}
public static char Iso8859_2ToUTF16Char(byte P)
{
switch (P)
{
case 0xa1: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK
case 0xa2: return (char) 0x02d8; // BREVE
case 0xa3: return (char) 0x0141; // LATIN CAPITAL LETTER L WITH STROKE
case 0xa5: return (char) 0x0132; // LATIN CAPITAL LETTER L WITH CARON
case 0xa6: return (char) 0x015a; // LATIN CAPITAL LETTER S WITH ACUTE
case 0xa9: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON
case 0xaa: return (char) 0x015e; // LATIN CAPITAL LETTER S WITH CEDILLA
case 0xab: return (char) 0x0164; // LATIN CAPITAL LETTER T WITH CARON
case 0xac: return (char) 0x0179; // LATIN CAPITAL LETTER Z WITH ACUTE
case 0xae: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON
case 0xaf: return (char) 0x017b; // LATIN CAPITAL LETTER Z WITH DOT ABOVE
case 0xb1: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK
case 0xb2: return (char) 0x02db; // OGONEK
case 0xb3: return (char) 0x0142; // LATIN SMALL LETTER L WITH STROKE
case 0xb5: return (char) 0x013e; // LATIN SMALL LETTER L WITH CARON
case 0xb6: return (char) 0x015b; // LATIN SMALL LETTER S WITH ACUTE
case 0xb7: return (char) 0x02c7; // CARON
case 0xb9: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON
case 0xba: return (char) 0x015f; // LATIN SMALL LETTER S WITH CEDILLA
case 0xbb: return (char) 0x0165; // LATIN SMALL LETTER T WITH CARON
case 0xbc: return (char) 0x017a; // LATIN SMALL LETTER Z WITH ACUTE
case 0xbd: return (char) 0x02dd; // DOUBLE ACUTE ACCENT
case 0xbe: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON
case 0xbf: return (char) 0x017c; // LATIN SMALL LETTER Z WITH DOT ABOVE
case 0xc0: return (char) 0x0154; // LATIN CAPITAL LETTER R WITH ACUTE
case 0xc3: return (char) 0x0102; // LATIN CAPITAL LETTER A WITH BREVE
case 0xc5: return (char) 0x0139; // LATIN CAPITAL LETTER L WITH ACUTE
case 0xc6: return (char) 0x0106; // LATIN CAPITAL LETTER C WITH ACUTE
case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON
case 0xca: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK
case 0xcc: return (char) 0x011a; // LATIN CAPITAL LETTER E WITH CARON
case 0xcf: return (char) 0x010e; // LATIN CAPITAL LETTER D WITH CARON
case 0xd0: return (char) 0x0110; // LATIN CAPITAL LETTER D WITH STROKE
case 0xd1: return (char) 0x0143; // LATIN CAPITAL LETTER N WITH ACUTE
case 0xd2: return (char) 0x0147; // LATIN CAPITAL LETTER N WITH CARON
case 0xd5: return (char) 0x0150; // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
case 0xd8: return (char) 0x0158; // LATIN CAPITAL LETTER R WITH CARON
case 0xd9: return (char) 0x016e; // LATIN CAPITAL LETTER U WITH RING ABOVE
case 0xdb: return (char) 0x0170; // LATIN CAPITAL LETTER U WITH WITH DOUBLE ACUTE
case 0xde: return (char) 0x0162; // LATIN CAPITAL LETTER T WITH CEDILLA
case 0xe0: return (char) 0x0155; // LATIN SMALL LETTER R WITH ACUTE
case 0xe3: return (char) 0x0103; // LATIN SMALL LETTER A WITH BREVE
case 0xe5: return (char) 0x013a; // LATIN SMALL LETTER L WITH ACUTE
case 0xe6: return (char) 0x0107; // LATIN SMALL LETTER C WITH ACUTE
case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON
case 0xea: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK
case 0xec: return (char) 0x011b; // LATIN SMALL LETTER E WITH CARON
case 0xef: return (char) 0x010f; // LATIN SMALL LETTER D WITH CARON
case 0xf0: return (char) 0x0111; // LATIN SMALL LETTER D WITH STROKE
case 0xf1: return (char) 0x0144; // LATIN SMALL LETTER N WITH ACUTE
case 0xf2: return (char) 0x0148; // LATIN SMALL LETTER N WITH CARON
case 0xf5: return (char) 0x0151; // LATIN SMALL LETTER O WITH DOUBLE ACUTE
case 0xf8: return (char) 0x0159; // LATIN SMALL LETTER R WITH CARON
case 0xf9: return (char) 0x016f; // LATIN SMALL LETTER U WITH RING ABOVE
case 0xfb: return (char) 0x0171; // LATIN SMALL LETTER U WITH WITH DOUBLE ACUTE
case 0xfe: return (char) 0x0163; // LATIN SMALL LETTER T WITH CEDILLA
case 0xff: return (char) 0x02d9; // DOT ABOVE
default:
return (char) P;
}
}
public static char Iso8859_3ToUTF16Char( byte P)
{
switch (P)
{
case 0xa1: return (char) 0x0126; // LATIN CAPITAL LETTER H WITH STROKE
case 0xa2: return (char) 0x02d8; // BREVE
case 0xa5: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");
case 0xa6: return (char) 0x0124; // LATIN CAPITAL LETTER H WITH CIRCUMFLEX
case 0xa9: return (char) 0x0130; // LATIN CAPITAL LETTER I WITH DOT ABOVE
case 0xaa: return (char) 0x015e; // LATIN CAPITAL LETTER S WITH CEDILLA
case 0xab: return (char) 0x011e; // LATIN CAPITAL LETTER G WITH BREVE
case 0xac: return (char) 0x0134; // LATIN CAPITAL LETTER J WITH CIRCUMFLEX
case 0xae: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");
case 0xaf: return (char) 0x017b; // LATIN CAPITAL LETTER Z WITH DOT
case 0xb1: return (char) 0x0127; // LATIN SMALL LETTER H WITH STROKE
case 0xb6: return (char) 0x0125; // LATIN SMALL LETTER H WITH CIRCUMFLEX
case 0xb9: return (char) 0x0131; // LATIN SMALL LETTER DOTLESS I
case 0xba: return (char) 0x015f; // LATIN SMALL LETTER S WITH CEDILLA
case 0xbb: return (char) 0x011f; // LATIN SMALL LETTER G WITH BREVE
case 0xbc: return (char) 0x0135; // LATIN SMALL LETTER J WITH CIRCUMFLEX
case 0xbe: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");
case 0xbf: return (char) 0x017c; // LATIN SMALL LETTER Z WITH DOT
case 0xc3: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");
case 0xc5: return (char) 0x010a; // LATIN CAPITAL LETTER C WITH DOT ABOVE
case 0xc6: return (char) 0x0108; // LATIN CAPITAL LETTER C WITH CIRCUMFLEX
case 0xd0: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");
case 0xd5: return (char) 0x0120; // LATIN CAPITAL LETTER G WITH DOT ABOVE
case 0xd8: return (char) 0x011c; // LATIN CAPITAL LETTER G WITH CIRCUMFLEX
case 0xdd: return (char) 0x016c; // LATIN CAPITAL LETTER U WITH BREVE
case 0xde: return (char) 0x015c; // LATIN CAPITAL LETTER S WITH CIRCUMFLEX
case 0xe3: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");
case 0xe5: return (char) 0x010b; // LATIN SMALL LETTER C WITH DOT ABOVE
case 0xe6: return (char) 0x0109; // LATIN SMALL LETTER C WITH CIRCUMFLEX
case 0xf0: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");
case 0xf5: return (char) 0x0121; // LATIN SMALL LETTER G WITH DOT ABOVE
case 0xf8: return (char) 0x011d; // LATIN SMALL LETTER G WITH CIRCUMFLEX
case 0xfd: return (char) 0x016d; // LATIN SMALL LETTER U WITH BREVE
case 0xfe: return (char) 0x015d; // LATIN SMALL LETTER S WITH CIRCUMFLEX
case 0xff: return (char) 0x02d9; // DOT ABOVE
default:
return (char) P;
}
}
public static char Iso8859_4ToUTF16Char( byte P)
{
switch (P)
{
case 0xa1: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK
case 0xa2: return (char) 0x0138; // LATIN SMALL LETTER KRA
case 0xa3: return (char) 0x0156; // LATIN CAPITAL LETTER R WITH CEDILLA
case 0xa5: return (char) 0x0128; // LATIN CAPITAL LETTER I WITH TILDE
case 0xa6: return (char) 0x013b; // LATIN CAPITAL LETTER L WITH CEDILLA
case 0xa9: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON
case 0xaa: return (char) 0x0112; // LATIN CAPITAL LETTER E WITH MACRON
case 0xab: return (char) 0x0122; // LATIN CAPITAL LETTER G WITH CEDILLA
case 0xac: return (char) 0x0166; // LATIN CAPITAL LETTER T WITH STROKE
case 0xae: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON
case 0xb1: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK
case 0xb2: return (char) 0x02db; // OGONEK
case 0xb3: return (char) 0x0157; // LATIN SMALL LETTER R WITH CEDILLA
case 0xb5: return (char) 0x0129; // LATIN SMALL LETTER I WITH TILDE
case 0xb6: return (char) 0x013c; // LATIN SMALL LETTER L WITH CEDILLA
case 0xb7: return (char) 0x02c7; // CARON
case 0xb9: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON
case 0xba: return (char) 0x0113; // LATIN SMALL LETTER E WITH MACRON
case 0xbb: return (char) 0x0123; // LATIN SMALL LETTER G WITH CEDILLA
case 0xbc: return (char) 0x0167; // LATIN SMALL LETTER T WITH STROKE
case 0xbd: return (char) 0x014a; // LATIN CAPITAL LETTER ENG
case 0xbe: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON
case 0xbf: return (char) 0x014b; // LATIN SMALL LETTER ENG
case 0xc0: return (char) 0x0100; // LATIN CAPITAL LETTER A WITH MACRON
case 0xc7: return (char) 0x012e; // LATIN CAPITAL LETTER I WITH OGONEK
case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON
case 0xca: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK
case 0xcc: return (char) 0x0116; // LATIN CAPITAL LETTER E WITH DOT ABOVE
case 0xcf: return (char) 0x012a; // LATIN CAPITAL LETTER I WITH MACRON
case 0xd0: return (char) 0x0110; // LATIN CAPITAL LETTER D WITH STROKE
case 0xd1: return (char) 0x0145; // LATIN CAPITAL LETTER N WITH CEDILLA
case 0xd2: return (char) 0x014c; // LATIN CAPITAL LETTER O WITH MACRON
case 0xd3: return (char) 0x0136; // LATIN CAPITAL LETTER K WITH CEDILLA
case 0xd9: return (char) 0x0172; // LATIN CAPITAL LETTER U WITH OGONEK
case 0xdd: return (char) 0x0168; // LATIN CAPITAL LETTER U WITH TILDE
case 0xde: return (char) 0x016a; // LATIN CAPITAL LETTER U WITH MACRON
case 0xe0: return (char) 0x0101; // LATIN SMALL LETTER A WITH MACRON
case 0xe7: return (char) 0x012f; // LATIN SMALL LETTER I WITH OGONEK
case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON
case 0xea: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK
case 0xec: return (char) 0x0117; // LATIN SMALL LETTER E WITH DOT ABOVE
case 0xef: return (char) 0x012b; // LATIN SMALL LETTER I WITH MACRON
case 0xf0: return (char) 0x0111; // LATIN SMALL LETTER D WITH STROKE
case 0xf1: return (char) 0x0146; // LATIN SMALL LETTER N WITH CEDILLA
case 0xf2: return (char) 0x014d; // LATIN SMALL LETTER O WITH MACRON
case 0xf3: return (char) 0x0137; // LATIN SMALL LETTER K WITH CEDILLA
case 0xf9: return (char) 0x0173; // LATIN SMALL LETTER U WITH OGONEK
case 0xfd: return (char) 0x0169; // LATIN SMALL LETTER U WITH TILDE
case 0xfe: return (char) 0x016b; // LATIN SMALL LETTER U WITH MACRON
case 0xff: return (char) 0x02d9; // DOT ABOVE
default:
return (char) P;
}
}
public static char Iso8859_5ToUTF16Char(byte P)
{
if ( (P >= 0x00) & (P <= 0xa0) )
return (char) P;
else if ( P == 0xad )
return (char) P;
else if ( P == 0xf0 )
return (char) 0x2116; // NUMERO SIGN
else if ( P == 0xfd )
return (char) 0x00a7; // SECTION SIGN
else
return System.Convert.ToChar( 0x0360 + P );
}
public static char Iso8859_6ToUTF16Char(byte P)
{
if ( (P >= 0x00) & ( P <= 0xa0) )
return (char) P;
else if ( P == 0xa4)
return (char) P;
else if ( ( P == 0xac ) | (P==0xbb) | (P==0xbf) )
return System.Convert.ToChar(P + 0x0580);
else if ( (P >= 0xc1) & ( P <= 0xda) )
return System.Convert.ToChar(P + 0x0580);
else if ( (P >= 0xe0) & ( P <= 0xf2) )
return System.Convert.ToChar(P + 0x0580);
else
throw new InvalidOperationException("Invalid ISO-8859-6 sequence [" + P.ToString() + "]");
}
public static char Iso8859_7ToUTF16Char(byte P)
{
if ( (P >= 0x00) & ( P <= 0xa0) )
return (char) P;
else if ( (P >= 0xa6) & ( P <= 0xa9) )
return (char) P;
else if ( (P >= 0xab) & ( P <= 0xad) )
return (char) P;
else if ( (P >= 0xb0) & ( P <= 0xb3) )
return (char) P;
else if ( (P == 0xb7) | (P==0xbb) | (P==0xbd) )
return (char) P;
else if ( P ==0xa1 ) // LEFT SINGLE QUOTATION MARK
return (char) 0x2018;
else if ( P==0xa2 ) // RIGHT SINGLE QUOTATION MARK
return (char) 0x2019;
else if ( P==0xaf ) // HORIZONTAL BAR
return (char) 0x2015;
else if ( (P==0xd2) | (P==0xff) )
throw new InvalidOperationException("Invalid ISO-8859-7 sequence [" + P.ToString() + "]");
else
return System.Convert.ToChar(P + 0x02d0);
}
public static char Iso8859_8ToUTF16Char(byte P)
{
if ( (P >= 0x00) & ( P <= 0xa0) )
return (char) P;
else if ( (P >= 0xa2) & ( P <= 0xa9) )
return (char) P;
else if ( (P >= 0xab) & ( P <= 0xae) )
return (char) P;
else if ( (P >= 0xb0) & ( P <= 0xb9) )
return (char) P;
else if ( (P >= 0xbb) & ( P <= 0xbe) )
return (char) P;
else if ( P==0xaa ) // MULTIPLICATION SIGN
return (char) 0x00d7;
else if ( P==0xaf ) // OVERLINE
return (char) 0x203e;
else if ( P==0xba ) // DIVISION SIGN
return (char) 0x00f7;
else if ( P==0xdf ) // DOUBLE LOW LINE
return (char) 0x2017;
else if ( (P >= 0xe0) & ( P <= 0xfa) )
return System.Convert.ToChar(P + 0x04e0);
else
throw new InvalidOperationException("Invalid ISO-8859-8 sequence [" + P.ToString() + "]");
}
public static char Iso8859_9ToUTF16Char(byte P)
{
switch (P)
{
case 0xd0: return (char) 0x011e; // LATIN CAPITAL LETTER G WITH BREVE
case 0xdd: return (char) 0x0130; // LATIN CAPITAL LETTER I WITH DOT ABOVE
case 0xde: return (char) 0x015e; // LATIN CAPITAL LETTER S WITH CEDILLA
case 0xf0: return (char) 0x011f; // LATIN SMALL LETTER G WITH BREVE
case 0xfd: return (char) 0x0131; // LATIN SMALL LETTER I WITH DOT ABOVE
case 0xfe: return (char) 0x015f; // LATIN SMALL LETTER S WITH CEDILLA
default:
return (char) P;
}
}
public static char Iso8859_10ToUTF16Char(byte P)
{
switch (P)
{
case 0xa1: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK
case 0xa2: return (char) 0x0112; // LATIN CAPITAL LETTER E WITH MACRON
case 0xa3: return (char) 0x0122; // LATIN CAPITAL LETTER G WITH CEDILLA
case 0xa4: return (char) 0x012a; // LATIN CAPITAL LETTER I WITH MACRON
case 0xa5: return (char) 0x0128; // LATIN CAPITAL LETTER I WITH TILDE
case 0xa6: return (char) 0x0136; // LATIN CAPITAL LETTER K WITH CEDILLA
case 0xa8: return (char) 0x013b; // LATIN CAPITAL LETTER L WITH CEDILLA
case 0xa9: return (char) 0x0110; // LATIN CAPITAL LETTER D WITH STROKE
case 0xaa: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON
case 0xab: return (char) 0x0166; // LATIN CAPITAL LETTER T WITH STROKE
case 0xac: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON
case 0xae: return (char) 0x016a; // LATIN CAPITAL LETTER U WITH MACRON
case 0xaf: return (char) 0x014a; // LATIN CAPITAL LETTER ENG
case 0xb1: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK
case 0xb2: return (char) 0x0113; // LATIN SMALL LETTER E WITH MACRON
case 0xb3: return (char) 0x0123; // LATIN SMALL LETTER G WITH CEDILLA
case 0xb4: return (char) 0x012b; // LATIN SMALL LETTER I WITH MACRON
case 0xb5: return (char) 0x0129; // LATIN SMALL LETTER I WITH TILDE
case 0xb6: return (char) 0x0137; // LATIN SMALL LETTER K WITH CEDILLA
case 0xb8: return (char) 0x013c; // LATIN SMALL LETTER L WITH CEDILLA
case 0xb9: return (char) 0x0111; // LATIN SMALL LETTER D WITH STROKE
case 0xba: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON
case 0xbb: return (char) 0x0167; // LATIN SMALL LETTER T WITH STROKE
case 0xbc: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON
case 0xbd: return (char) 0x2015; // HORIZONTAL BAR
case 0xbe: return (char) 0x016b; // LATIN SMALL LETTER U WITH MACRON
case 0xbf: return (char) 0x014b; // LATIN SMALL LETTER ENG
case 0xc0: return (char) 0x0100; // LATIN CAPITAL LETTER A WITH MACRON
case 0xc7: return (char) 0x012e; // LATIN CAPITAL LETTER I WITH OGONEK
case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON
case 0xca: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK
case 0xcc: return (char) 0x0116; // LATIN CAPITAL LETTER E WITH DOT ABOVE
case 0xd1: return (char) 0x0145; // LATIN CAPITAL LETTER N WITH CEDILLA
case 0xd2: return (char) 0x014c; // LATIN CAPITAL LETTER O WITH MACRON
case 0xd7: return (char) 0x0168; // LATIN CAPITAL LETTER U WITH TILDE
case 0xd9: return (char) 0x0172; // LATIN CAPITAL LETTER U WITH OGONEK
case 0xe0: return (char) 0x0101; // LATIN SMALL LETTER A WITH MACRON
case 0xe7: return (char) 0x012f; // LATIN SMALL LETTER I WITH OGONEK
case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON
case 0xea: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK
case 0xec: return (char) 0x0117; // LATIN SMALL LETTER E WITH DOT ABOVE
case 0xf1: return (char) 0x0146; // LATIN SMALL LETTER N WITH CEDILLA
case 0xf2: return (char) 0x014d; // LATIN SMALL LETTER O WITH MACRON
case 0xf7: return (char) 0x0169; // LATIN SMALL LETTER U WITH TILDE
case 0xf9: return (char) 0x0173; // LATIN SMALL LETTER U WITH OGONEK
case 0xff: return (char) 0x0138; // LATIN SMALL LETTER KRA
default:
return (char) P;
}
}
public static char Iso8859_13ToUTF16Char(byte P)
{
switch(P)
{
case 0xa1: return (char) 0x201d; // RIGHT DOUBLE QUOTATION MARK
case 0xa5: return (char) 0x201e; // DOUBLE LOW-9 QUOTATION MARK
case 0xa8: return (char) 0x00d8; // LATIN CAPITAL LETTER O WITH STROKE
case 0xaa: return (char) 0x0156; // LATIN CAPITAL LETTER R WITH CEDILLA
case 0xaf: return (char) 0x00c6; // LATIN CAPITAL LETTER AE
case 0xb4: return (char) 0x201c; // LEFT DOUBLE QUOTATION MARK
case 0xb8: return (char) 0x00f8; // LATIN SMALL LETTER O WITH STROKE
case 0xba: return (char) 0x0157; // LATIN SMALL LETTER R WITH CEDILLA
case 0xbf: return (char) 0x00e6; // LATIN SMALL LETTER AE
case 0xc0: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK
case 0xc1: return (char) 0x012e; // LATIN CAPITAL LETTER I WITH OGONEK
case 0xc2: return (char) 0x0100; // LATIN CAPITAL LETTER A WITH MACRON
case 0xc3: return (char) 0x0106; // LATIN CAPITAL LETTER C WITH ACUTE
case 0xc6: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK
case 0xc7: return (char) 0x0112; // LATIN CAPITAL LETTER E WITH MACRON
case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON
case 0xca: return (char) 0x0179; // LATIN CAPITAL LETTER Z WITH ACUTE
case 0xcb: return (char) 0x0116; // LATIN CAPITAL LETTER E WITH DOT ABOVE
case 0xcc: return (char) 0x0122; // LATIN CAPITAL LETTER G WITH CEDILLA
case 0xcd: return (char) 0x0136; // LATIN CAPITAL LETTER K WITH CEDILLA
case 0xce: return (char) 0x012a; // LATIN CAPITAL LETTER I WITH MACRON
case 0xcf: return (char) 0x013b; // LATIN CAPITAL LETTER L WITH CEDILLA
case 0xd0: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON
case 0xd1: return (char) 0x0143; // LATIN CAPITAL LETTER N WITH ACUTE
case 0xd2: return (char) 0x0145; // LATIN CAPITAL LETTER N WITH CEDILLA
case 0xd4: return (char) 0x014c; // LATIN CAPITAL LETTER O WITH MACRON
case 0xd8: return (char) 0x0172; // LATIN CAPITAL LETTER U WITH OGONEK
case 0xd9: return (char) 0x0141; // LATIN CAPITAL LETTER L WITH STROKE
case 0xda: return (char) 0x015a; // LATIN CAPITAL LETTER S WITH ACUTE
case 0xdb: return (char) 0x016a; // LATIN CAPITAL LETTER U WITH MACRON
case 0xdd: return (char) 0x017b; // LATIN CAPITAL LETTER Z WITH DOT ABOVE
case 0xde: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON
case 0xe0: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK
case 0xe1: return (char) 0x012f; // LATIN SMALL LETTER I WITH OGONEK
case 0xe2: return (char) 0x0101; // LATIN SMALL LETTER A WITH MACRON
case 0xe3: return (char) 0x0107; // LATIN SMALL LETTER C WITH ACUTE
case 0xe6: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK
case 0xe7: return (char) 0x0113; // LATIN SMALL LETTER E WITH MACRON
case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON
case 0xea: return (char) 0x017a; // LATIN SMALL LETTER Z WITH ACUTE
case 0xeb: return (char) 0x0117; // LATIN SMALL LETTER E WITH DOT ABOVE
case 0xec: return (char) 0x0123; // LATIN SMALL LETTER G WITH CEDILLA
case 0xed: return (char) 0x0137; // LATIN SMALL LETTER K WITH CEDILLA
case 0xee: return (char) 0x012b; // LATIN SMALL LETTER I WITH MACRON
case 0xef: return (char) 0x013c; // LATIN SMALL LETTER L WITH CEDILLA
case 0xf0: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON
case 0xf1: return (char) 0x0144; // LATIN SMALL LETTER N WITH ACUTE
case 0xf2: return (char) 0x0146; // LATIN SMALL LETTER N WITH CEDILLA
case 0xf4: return (char) 0x014d; // LATIN SMALL LETTER O WITH MACRON
case 0xf8: return (char) 0x0173; // LATIN SMALL LETTER U WITH OGONEK
case 0xf9: return (char) 0x0142; // LATIN SMALL LETTER L WITH STROKE
case 0xfa: return (char) 0x015b; // LATIN SMALL LETTER S WITH ACUTE
case 0xfb: return (char) 0x016b; // LATIN SMALL LETTER U WITH MACRON
case 0xfd: return (char) 0x017c; // LATIN SMALL LETTER Z WITH DOT ABOVE
case 0xfe: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON
case 0xff: return (char) 0x2019; // RIGHT SINGLE QUOTATION MARK
default:
return (char) P;
}
}
public static char Iso8859_14ToUTF16Char(byte P)
{
switch (P)
{
case 0xa1: return (char) 0x1e02; // LATIN CAPITAL LETTER B WITH DOT ABOVE
case 0xa2: return (char) 0x1e03; // LATIN SMALL LETTER B WITH DOT ABOVE
case 0xa4: return (char) 0x010a; // LATIN CAPITAL LETTER C WITH DOT ABOVE
case 0xa5: return (char) 0x010b; // LATIN SMALL LETTER C WITH DOT ABOVE
case 0xa6: return (char) 0x1e0a; // LATIN CAPITAL LETTER D WITH DOT ABOVE
case 0xa8: return (char) 0x1e80; // LATIN CAPITAL LETTER W WITH GRAVE
case 0xaa: return (char) 0x1e82; // LATIN CAPITAL LETTER W WITH ACUTE
case 0xab: return (char) 0x1e0b; // LATIN SMALL LETTER D WITH DOT ABOVE
case 0xac: return (char) 0x1ef2; // LATIN CAPITAL LETTER Y WITH GRAVE
case 0xaf: return (char) 0x0178; // LATIN CAPITAL LETTER Y WITH DIAERESIS
case 0xb0: return (char) 0x1e1e; // LATIN CAPITAL LETTER F WITH DOT ABOVE
case 0xb1: return (char) 0x1e1f; // LATIN SMALL LETTER F WITH DOT ABOVE
case 0xb2: return (char) 0x0120; // LATIN CAPITAL LETTER G WITH DOT ABOVE
case 0xb3: return (char) 0x0121; // LATIN SMALL LETTER G WITH DOT ABOVE
case 0xb4: return (char) 0x1e40; // LATIN CAPITAL LETTER M WITH DOT ABOVE
case 0xb5: return (char) 0x1e41; // LATIN SMALL LETTER M WITH DOT ABOVE
case 0xb7: return (char) 0x1e56; // LATIN CAPITAL LETTER P WITH DOT ABOVE
case 0xb8: return (char) 0x1e81; // LATIN SMALL LETTER W WITH GRAVE
case 0xb9: return (char) 0x1e57; // LATIN SMALL LETTER P WITH DOT ABOVE
case 0xba: return (char) 0x1e83; // LATIN SMALL LETTER W WITH ACUTE
case 0xbb: return (char) 0x1e60; // LATIN CAPITAL LETTER S WITH DOT ABOVE
case 0xbc: return (char) 0x1ef3; // LATIN SMALL LETTER Y WITH GRAVE
case 0xbd: return (char) 0x1e84; // LATIN CAPITAL LETTER W WITH DIAERESIS
case 0xbe: return (char) 0x1e85; // LATIN SMALL LETTER W WITH DIAERESIS
case 0xbf: return (char) 0x1e61; // LATIN SMALL LETTER S WITH DOT ABOVE
case 0xd0: return (char) 0x0174; // LATIN CAPITAL LETTER W WITH CIRCUMFLEX
case 0xd7: return (char) 0x1e6a; // LATIN CAPITAL LETTER T WITH DOT ABOVE
case 0xde: return (char) 0x0176; // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
case 0xf0: return (char) 0x0175; // LATIN SMALL LETTER W WITH CIRCUMFLEX
case 0xf7: return (char) 0x1e6b; // LATIN SMALL LETTER T WITH DOT ABOVE
case 0xfe: return (char) 0x0177; // LATIN SMALL LETTER Y WITH CIRCUMFLEX
default:
return (char) P;
}
}
public static char Iso8859_15ToUTF16Char(byte P)
{
switch (P)
{
case 0xa4: return (char) 0x20ac; // EURO SIGN
case 0xa6: return (char) 0x00a6; // LATIN CAPITAL LETTER S WITH CARON
case 0xa8: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON
case 0xb4: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON
case 0xb8: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON
case 0xbc: return (char) 0x0152; // LATIN CAPITAL LIGATURE OE
case 0xbd: return (char) 0x0153; // LATIN SMALL LIGATURE OE
case 0xbe: return (char) 0x0178; // LATIN CAPITAL LETTER Y WITH DIAERESIS
default:
return (char) P;
}
}
public static char KOI8_RToUTF16Char(byte P)
{
switch (P)
{
case 0x80: return (char) 0x2500; // BOX DRAWINGS LIGHT HORIZONTAL
case 0x81: return (char) 0x2502; // BOX DRAWINGS LIGHT VERTICAL
case 0x82: return (char) 0x250c; // BOX DRAWINGS LIGHT DOWN AND RIGHT
case 0x83: return (char) 0x2510; // BOX DRAWINGS LIGHT DOWN AND LEFT
case 0x84: return (char) 0x2514; // BOX DRAWINGS LIGHT UP AND RIGHT
case 0x85: return (char) 0x2518; // BOX DRAWINGS LIGHT UP AND LEFT
case 0x86: return (char) 0x251c; // BOX DRAWINGS LIGHT VERTICAL AND RIGHT
case 0x87: return (char) 0x2524; // BOX DRAWINGS LIGHT VERTICAL AND LEFT
case 0x88: return (char) 0x252c; // BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
case 0x89: return (char) 0x2534; // BOX DRAWINGS LIGHT UP AND HORIZONTAL
case 0x8a: return (char) 0x253c; // BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
case 0x8b: return (char) 0x2580; // UPPER HALF BLOCK
case 0x8c: return (char) 0x2584; // LOWER HALF BLOCK
case 0x8d: return (char) 0x2588; // FULL BLOCK
case 0x8e: return (char) 0x258c; // LEFT HALF BLOCK
case 0x8f: return (char) 0x2590; // RIGHT HALF BLOCK
case 0x90: return (char) 0x2591; // LIGHT SHADE
case 0x91: return (char) 0x2592; // MEDIUM SHADE
case 0x92: return (char) 0x2593; // DARK SHADE
case 0x93: return (char) 0x2320; // TOP HALF INTEGRAL
case 0x94: return (char) 0x25a0; // BLACK SQUARE
case 0x95: return (char) 0x2219; // BULLET OPERATOR
case 0x96: return (char) 0x221a; // SQUARE ROOT
case 0x97: return (char) 0x2248; // ALMOST EQUAL TO
case 0x98: return (char) 0x2264; // LESS-THAN OR EQUAL TO
case 0x99: return (char) 0x2265; // GREATER-THAN OR EQUAL TO
case 0x9a: return (char) 0x00a0; // NO-BREAK SPACE
case 0x9b: return (char) 0x2321; // BOTTOM HALF INTEGRAL
case 0x9c: return (char) 0x00b0; // DEGREE SIGN
case 0x9d: return (char) 0x00b2; // SUPERSCRIPT TWO
case 0x9e: return (char) 0x00b7; // MIDDLE DOT
case 0x9f: return (char) 0x00f7; // DIVISION SIGN
case 0xa0: return (char) 0x2550; // BOX DRAWINGS DOUBLE HORIZONTAL
case 0xa1: return (char) 0x2551; // BOX DRAWINGS DOUBLE VERTICAL
case 0xa2: return (char) 0x2552; // BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
case 0xa3: return (char) 0x0451; // CYRILLIC SMALL LETTER IO
case 0xa4: return (char) 0x2553; // BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
case 0xa5: return (char) 0x2554; // BOX DRAWINGS DOUBLE DOWN AND RIGHT
case 0xa6: return (char) 0x2555; // BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
case 0xa7: return (char) 0x2556; // BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
case 0xa8: return (char) 0x2557; // BOX DRAWINGS DOUBLE DOWN AND LEFT
case 0xa9: return (char) 0x2558; // BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
case 0xaa: return (char) 0x2559; // BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
case 0xab: return (char) 0x255a; // BOX DRAWINGS DOUBLE UP AND RIGHT
case 0xac: return (char) 0x255b; // BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
case 0xad: return (char) 0x255c; // BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
case 0xae: return (char) 0x255d; // BOX DRAWINGS DOUBLE UP AND LEFT
case 0xaf: return (char) 0x255e; // BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
case 0xb0: return (char) 0x255f; // BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
case 0xb1: return (char) 0x2560; // BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
case 0xb2: return (char) 0x2561; // BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
case 0xb3: return (char) 0x0401; // CYRILLIC CAPITAL LETTER IO
case 0xb4: return (char) 0x2562; // BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
case 0xb5: return (char) 0x2563; // BOX DRAWINGS DOUBLE VERTICAL AND LEFT
case 0xb6: return (char) 0x2564; // BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
case 0xb7: return (char) 0x2565; // BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
case 0xb8: return (char) 0x2566; // BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
case 0xb9: return (char) 0x2567; // BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
case 0xba: return (char) 0x2568; // BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
case 0xbb: return (char) 0x2569; // BOX DRAWINGS DOUBLE UP AND HORIZONTAL
case 0xbc: return (char) 0x256a; // BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
case 0xbd: return (char) 0x256b; // BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
case 0xbe: return (char) 0x256c; // BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
case 0xbf: return (char) 0x00a9; // COPYRIGHT SIGN
case 0xc0: return (char) 0x044e; // CYRILLIC SMALL LETTER YU
case 0xc1: return (char) 0x0430; // CYRILLIC SMALL LETTER A
case 0xc2: return (char) 0x0431; // CYRILLIC SMALL LETTER BE
case 0xc3: return (char) 0x0446; // CYRILLIC SMALL LETTER TSE
case 0xc4: return (char) 0x0434; // CYRILLIC SMALL LETTER DE
case 0xc5: return (char) 0x0435; // CYRILLIC SMALL LETTER IE
case 0xc6: return (char) 0x0444; // CYRILLIC SMALL LETTER EF
case 0xc7: return (char) 0x0433; // CYRILLIC SMALL LETTER GHE
case 0xc8: return (char) 0x0445; // CYRILLIC SMALL LETTER HA
case 0xc9: return (char) 0x0438; // CYRILLIC SMALL LETTER I
case 0xca: return (char) 0x0439; // CYRILLIC SMALL LETTER SHORT I
case 0xcb: return (char) 0x043a; // CYRILLIC SMALL LETTER KA
case 0xcc: return (char) 0x043b; // CYRILLIC SMALL LETTER EL
case 0xcd: return (char) 0x043c; // CYRILLIC SMALL LETTER EM
case 0xce: return (char) 0x043d; // CYRILLIC SMALL LETTER EN
case 0xcf: return (char) 0x043e; // CYRILLIC SMALL LETTER O
case 0xd0: return (char) 0x043f; // CYRILLIC SMALL LETTER PE
case 0xd1: return (char) 0x044f; // CYRILLIC SMALL LETTER YA
case 0xd2: return (char) 0x0440; // CYRILLIC SMALL LETTER ER
case 0xd3: return (char) 0x0441; // CYRILLIC SMALL LETTER ES
case 0xd4: return (char) 0x0442; // CYRILLIC SMALL LETTER TE
case 0xd5: return (char) 0x0443; // CYRILLIC SMALL LETTER U
case 0xd6: return (char) 0x0436; // CYRILLIC SMALL LETTER ZHE
case 0xd7: return (char) 0x0432; // CYRILLIC SMALL LETTER VE
case 0xd8: return (char) 0x044c; // CYRILLIC SMALL LETTER SOFT SIGN
case 0xd9: return (char) 0x044b; // CYRILLIC SMALL LETTER YERU
case 0xda: return (char) 0x0437; // CYRILLIC SMALL LETTER ZE
case 0xdb: return (char) 0x0448; // CYRILLIC SMALL LETTER SHA
case 0xdc: return (char) 0x044d; // CYRILLIC SMALL LETTER E
case 0xdd: return (char) 0x0449; // CYRILLIC SMALL LETTER SHCHA
case 0xde: return (char) 0x0447; // CYRILLIC SMALL LETTER CHE
case 0xdf: return (char) 0x044a; // CYRILLIC SMALL LETTER HARD SIGN
case 0xe0: return (char) 0x042e; // CYRILLIC CAPITAL LETTER YU
case 0xe1: return (char) 0x0410; // CYRILLIC CAPITAL LETTER A
case 0xe2: return (char) 0x0411; // CYRILLIC CAPITAL LETTER BE
case 0xe3: return (char) 0x0426; // CYRILLIC CAPITAL LETTER TSE
case 0xe4: return (char) 0x0414; // CYRILLIC CAPITAL LETTER DE
case 0xe5: return (char) 0x0415; // CYRILLIC CAPITAL LETTER IE
case 0xe6: return (char) 0x0424; // CYRILLIC CAPITAL LETTER EF
case 0xe7: return (char) 0x0413; // CYRILLIC CAPITAL LETTER GHE
case 0xe8: return (char) 0x0425; // CYRILLIC CAPITAL LETTER HA
case 0xe9: return (char) 0x0418; // CYRILLIC CAPITAL LETTER I
case 0xea: return (char) 0x0419; // CYRILLIC CAPITAL LETTER SHORT I
case 0xeb: return (char) 0x041a; // CYRILLIC CAPITAL LETTER KA
case 0xec: return (char) 0x041b; // CYRILLIC CAPITAL LETTER EL
case 0xed: return (char) 0x041c; // CYRILLIC CAPITAL LETTER EM
case 0xee: return (char) 0x041d; // CYRILLIC CAPITAL LETTER EN
case 0xef: return (char) 0x041e; // CYRILLIC CAPITAL LETTER O
case 0xf0: return (char) 0x041f; // CYRILLIC CAPITAL LETTER PE
case 0xf1: return (char) 0x042f; // CYRILLIC CAPITAL LETTER YA
case 0xf2: return (char) 0x0420; // CYRILLIC CAPITAL LETTER ER
case 0xf3: return (char) 0x0421; // CYRILLIC CAPITAL LETTER ES
case 0xf4: return (char) 0x0422; // CYRILLIC CAPITAL LETTER TE
case 0xf5: return (char) 0x0423; // CYRILLIC CAPITAL LETTER U
case 0xf6: return (char) 0x0416; // CYRILLIC CAPITAL LETTER ZHE
case 0xf7: return (char) 0x0412; // CYRILLIC CAPITAL LETTER VE
case 0xf8: return (char) 0x042c; // CYRILLIC CAPITAL LETTER SOFT SIGN
case 0xf9: return (char) 0x042b; // CYRILLIC CAPITAL LETTER YERU
case 0xfa: return (char) 0x0417; // CYRILLIC CAPITAL LETTER ZE
case 0xfb: return (char) 0x0428; // CYRILLIC CAPITAL LETTER SHA
case 0xfc: return (char) 0x042d; // CYRILLIC CAPITAL LETTER E
case 0xfd: return (char) 0x0429; // CYRILLIC CAPITAL LETTER SHCHA
case 0xfe: return (char) 0x0427; // CYRILLIC CAPITAL LETTER CHE
case 0xff: return (char) 0x042a; // CYRILLIC CAPITAL LETTER HARD SIGN
default:
return (char) P;
}
}
public static char cp10000_MacRomanToUTF16Char(byte P)
{
switch (P)
{
case 0x80: return (char) 0x00c4; // LATIN CAPITAL LETTER A WITH DIAERESIS
case 0x81: return (char) 0x00c5; // LATIN CAPITAL LETTER A WITH RING ABOVE
case 0x82: return (char) 0x00c7; // LATIN CAPITAL LETTER C WITH CEDILLA
case 0x83: return (char) 0x00c9; // LATIN CAPITAL LETTER E WITH ACUTE
case 0x84: return (char) 0x00d1; // LATIN CAPITAL LETTER N WITH TILDE
case 0x85: return (char) 0x00d6; // LATIN CAPITAL LETTER O WITH DIAERESIS
case 0x86: return (char) 0x00dc; // LATIN CAPITAL LETTER U WITH DIAERESIS
case 0x87: return (char) 0x00e1; // LATIN SMALL LETTER A WITH ACUTE
case 0x88: return (char) 0x00e0; // LATIN SMALL LETTER A WITH GRAVE
case 0x89: return (char) 0x00e2; // LATIN SMALL LETTER A WITH CIRCUMFLEX
case 0x8a: return (char) 0x00e4; // LATIN SMALL LETTER A WITH DIAERESIS
case 0x8b: return (char) 0x00e3; // LATIN SMALL LETTER A WITH TILDE
case 0x8c: return (char) 0x00e5; // LATIN SMALL LETTER A WITH RING ABOVE
case 0x8d: return (char) 0x00e7; // LATIN SMALL LETTER C WITH CEDILLA
case 0x8e: return (char) 0x00e9; // LATIN SMALL LETTER E WITH ACUTE
case 0x8f: return (char) 0x00e8; // LATIN SMALL LETTER E WITH GRAVE
case 0x90: return (char) 0x00ea; // LATIN SMALL LETTER E WITH CIRCUMFLEX
case 0x91: return (char) 0x00eb; // LATIN SMALL LETTER E WITH DIAERESIS
case 0x92: return (char) 0x00ed; // LATIN SMALL LETTER I WITH ACUTE
case 0x93: return (char) 0x00ec; // LATIN SMALL LETTER I WITH GRAVE
case 0x94: return (char) 0x00ee; // LATIN SMALL LETTER I WITH CIRCUMFLEX
case 0x95: return (char) 0x00ef; // LATIN SMALL LETTER I WITH DIAERESIS
case 0x96: return (char) 0x00f1; // LATIN SMALL LETTER N WITH TILDE
case 0x97: return (char) 0x00f3; // LATIN SMALL LETTER O WITH ACUTE
case 0x98: return (char) 0x00f2; // LATIN SMALL LETTER O WITH GRAVE
case 0x99: return (char) 0x00f4; // LATIN SMALL LETTER O WITH CIRCUMFLEX
case 0x9a: return (char) 0x00f6; // LATIN SMALL LETTER O WITH DIAERESIS
case 0x9b: return (char) 0x00f5; // LATIN SMALL LETTER O WITH TILDE
case 0x9c: return (char) 0x00fa; // LATIN SMALL LETTER U WITH ACUTE
case 0x9d: return (char) 0x00f9; // LATIN SMALL LETTER U WITH GRAVE
case 0x9e: return (char) 0x00fb; // LATIN SMALL LETTER U WITH CIRCUMFLEX
case 0x9f: return (char) 0x00fc; // LATIN SMALL LETTER U WITH DIAERESIS
case 0xa0: return (char) 0x2020; // DAGGER
case 0xa1: return (char) 0x00b0; // DEGREE SIGN
case 0xa4: return (char) 0x00a7; // SECTION SIGN
case 0xa5: return (char) 0x2022; // BULLET
case 0xa6: return (char) 0x00b6; // PILCROW SIGN
case 0xa7: return (char) 0x00df; // LATIN SMALL LETTER SHARP S
case 0xa8: return (char) 0x00ae; // REGISTERED SIGN
case 0xaa: return (char) 0x2122; // TRADE MARK SIGN
case 0xab: return (char) 0x00b4; // ACUTE ACCENT
case 0xac: return (char) 0x00a8; // DIAERESIS
case 0xad: return (char) 0x2260; // NOT EQUAL TO
case 0xae: return (char) 0x00c6; // LATIN CAPITAL LIGATURE AE
case 0xaf: return (char) 0x00d8; // LATIN CAPITAL LETTER O WITH STROKE
case 0xb0: return (char) 0x221e; // INFINITY
case 0xb2: return (char) 0x2264; // LESS-THAN OR EQUAL TO
case 0xb3: return (char) 0x2265; // GREATER-THAN OR EQUAL TO
case 0xb4: return (char) 0x00a5; // YEN SIGN
case 0xb6: return (char) 0x2202; // PARTIAL DIFFERENTIAL
case 0xb7: return (char) 0x2211; // N-ARY SUMMATION
case 0xb8: return (char) 0x220f; // N-ARY PRODUCT
case 0xb9: return (char) 0x03c0; // GREEK SMALL LETTER PI
case 0xba: return (char) 0x222b; // INTEGRAL
case 0xbb: return (char) 0x00aa; // FEMININE ORDINAL INDICATOR
case 0xbc: return (char) 0x00ba; // MASCULINE ORDINAL INDICATOR
case 0xbd: return (char) 0x2126; // OHM SIGN
case 0xbe: return (char) 0x00e6; // LATIN SMALL LIGATURE AE
case 0xbf: return (char) 0x00f8; // LATIN SMALL LETTER O WITH STROKE
case 0xc0: return (char) 0x00bf; // INVERTED QUESTION MARK
case 0xc1: return (char) 0x00a1; // INVERTED EXCLAMATION MARK
case 0xc2: return (char) 0x00ac; // NOT SIGN
case 0xc3: return (char) 0x221a; // SQUARE ROOT
case 0xc4: return (char) 0x0192; // LATIN SMALL LETTER F WITH HOOK
case 0xc5: return (char) 0x2248; // ALMOST EQUAL TO
case 0xc6: return (char) 0x2206; // INCREMENT
case 0xc7: return (char) 0x00ab; // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
case 0xc8: return (char) 0x00bb; // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
case 0xc9: return (char) 0x2026; // HORIZONTAL ELLIPSIS
case 0xca: return (char) 0x00a0; // NO-BREAK SPACE
case 0xcb: return (char) 0x00c0; // LATIN CAPITAL LETTER A WITH GRAVE
case 0xcc: return (char) 0x00c3; // LATIN CAPITAL LETTER A WITH TILDE
case 0xcd: return (char) 0x00d5; // LATIN CAPITAL LETTER O WITH TILDE
case 0xce: return (char) 0x0152; // LATIN CAPITAL LIGATURE OE
case 0xcf: return (char) 0x0153; // LATIN SMALL LIGATURE OE
case 0xd0: return (char) 0x2013; // EN DASH
case 0xd1: return (char) 0x2014; // EM DASH
case 0xd2: return (char) 0x201c; // LEFT DOUBLE QUOTATION MARK
case 0xd3: return (char) 0x201d; // RIGHT DOUBLE QUOTATION MARK
case 0xd4: return (char) 0x2018; // LEFT SINGLE QUOTATION MARK
case 0xd5: return (char) 0x2019; // RIGHT SINGLE QUOTATION MARK
case 0xd6: return (char) 0x00f7; // DIVISION SIGN
case 0xd7: return (char) 0x25ca; // LOZENGE
case 0xd8: return (char) 0x00ff; // LATIN SMALL LETTER Y WITH DIAERESIS
case 0xd9: return (char) 0x0178; // LATIN CAPITAL LETTER Y WITH DIAERESIS
case 0xda: return (char) 0x2044; // FRACTION SLASH
case 0xdb: return (char) 0x00a4; // CURRENCY SIGN
case 0xdc: return (char) 0x2039; // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
case 0xdd: return (char) 0x203a; // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
case 0xde: return (char) 0xfb01; // LATIN SMALL LIGATURE FI
case 0xdf: return (char) 0xfb02; // LATIN SMALL LIGATURE FL
case 0xe0: return (char) 0x2021; // DOUBLE DAGGER
case 0xe1: return (char) 0x00b7; // MIDDLE DOT
case 0xe2: return (char) 0x201a; // SINGLE LOW-9 QUOTATION MARK
case 0xe3: return (char) 0x201e; // DOUBLE LOW-9 QUOTATION MARK
case 0xe4: return (char) 0x2030; // PER MILLE SIGN
case 0xe5: return (char) 0x00c2; // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
case 0xe6: return (char) 0x00ca; // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
case 0xe7: return (char) 0x00c1; // LATIN CAPITAL LETTER A WITH ACUTE
case 0xe8: return (char) 0x00cb; // LATIN CAPITAL LETTER E WITH DIAERESIS
case 0xe9: return (char) 0x00c8; // LATIN CAPITAL LETTER E WITH GRAVE
case 0xea: return (char) 0x00cd; // LATIN CAPITAL LETTER I WITH ACUTE
case 0xeb: return (char) 0x00ce; // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
case 0xec: return (char) 0x00cf; // LATIN CAPITAL LETTER I WITH DIAERESIS
case 0xed: return (char) 0x00cc; // LATIN CAPITAL LETTER I WITH GRAVE
case 0xee: return (char) 0x00d3; // LATIN CAPITAL LETTER O WITH ACUTE
case 0xef: return (char) 0x00d4; // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
case 0xf0: throw new InvalidOperationException("Invalid cp10000_MacRoman sequence [" + P.ToString() + "]");
case 0xf1: return (char) 0x00d2; // LATIN CAPITAL LETTER O WITH GRAVE
case 0xf2: return (char) 0x00da; // LATIN CAPITAL LETTER U WITH ACUTE
case 0xf3: return (char) 0x00db; // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
case 0xf4: return (char) 0x00d9; // LATIN CAPITAL LETTER U WITH GRAVE
case 0xf5: return (char) 0x0131; // LATIN SMALL LETTER DOTLESS I
case 0xf6: return (char) 0x02c6; // MODIFIER LETTER CIRCUMFLEX ACCENT
case 0xf7: return (char) 0x02dc; // SMALL TILDE
case 0xf8: return (char) 0x00af; // MACRON
case 0xf9: return (char) 0x02d8; // BREVE
case 0xfa: return (char) 0x02d9; // DOT ABOVE
case 0xfb: return (char) 0x02da; // RING ABOVE
case 0xfc: return (char) 0x00b8; // CEDILLA
case 0xfd: return (char) 0x02dd; // DOUBLE ACUTE ACCENT
case 0xfe: return (char) 0x02db; // OGONEK
case 0xff: return (char) 0x02c7; // CARON
default:
return (char) P;
}
}
public static char cp1250ToUTF16Char(byte P)
{
// This function was provided by Miloslav Skácel (ported by DrW)
switch (P)
{
case 0x80: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]");
case 0x81: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]");
case 0x83: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]");
case 0x88: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]");
case 0x90: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]");
case 0x98: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]");
case 0x82: return (char) 0x201a; // SINGLE LOW-9 QUOTATION MARK
case 0x84: return (char) 0x201e; // DOUBLE LOW-9 QUOTATION MARK
case 0x85: return (char) 0x2026; // HORIZONTAL ELLIPSIS
case 0x86: return (char) 0x2020; // DAGGER
case 0x87: return (char) 0x2021; // DOUBLE DAGGER
case 0x89: return (char) 0x2030; // PER MILLE SIGN
case 0x8a: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON
case 0x8b: return (char) 0x2039; // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
case 0x8c: return (char) 0x015a; // LATIN CAPITAL LETTER S WITH ACUTE
case 0x8d: return (char) 0x0164; // LATIN CAPITAL LETTER T WITH CARON
case 0x8e: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON
case 0x8f: return (char) 0x0179; // LATIN CAPITAL LETTER Z WITH ACUTE
case 0x91: return (char) 0x2018; // LEFT SINGLE QUOTATION MARK
case 0x92: return (char) 0x2019; // RIGHT SINGLE QUOTATION MARK
case 0x93: return (char) 0x201c; // LEFT DOUBLE QUOTATION MARK
case 0x94: return (char) 0x201d; // RIGHT DOUBLE QUOTATION MARK
case 0x95: return (char) 0x2022; // BULLET
case 0x96: return (char) 0x2013; // EN-DASH
case 0x97: return (char) 0x2014; // EM-DASH
case 0x99: return (char) 0x2122; // TRADE MARK SIGN
case 0x9a: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON
case 0x9b: return (char) 0x203a; // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
case 0x9c: return (char) 0x015b; // LATIN SMALL LETTER S WITH ACUTE
case 0x9d: return (char) 0x0165; // LATIN SMALL LETTER T WITH CARON
case 0x9e: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON
case 0x9f: return (char) 0x017a; // LATIN SMALL LETTER Z WITH ACUTE
case 0xa0: return (char) 0x00a0; // NO-BREAK SPACE
case 0xa1: return (char) 0x02c7; // CARON
case 0xa2: return (char) 0x02d8; // BREVE
case 0xa3: return (char) 0x0141; // LATIN CAPITAL LETTER L WITH STROKE
case 0xa4: return (char) 0x00a4; // CURRENCY SIGN
case 0xa5: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK
case 0xa6: return (char) 0x00a6; // BROKEN BAR
case 0xa7: return (char) 0x00a7; // SECTION SIGN
case 0xa8: return (char) 0x00a8; // DIAERESIS
case 0xa9: return (char) 0x00a9; // COPYRIGHT SIGN
case 0xaa: return (char) 0x015e; // LATIN CAPITAL LETTER S WITH CEDILLA
case 0xab: return (char) 0x00ab; // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
case 0xac: return (char) 0x00ac; // NOT SIGN
case 0xad: return (char) 0x00ad; // SOFT HYPHEN
case 0xae: return (char) 0x00ae; // REGISTERED SIGN
case 0xaf: return (char) 0x017b; // LATIN CAPITAL LETTER Z WITH DOT ABOVE
case 0xb0: return (char) 0x00b0; // DEGREE SIGN
case 0xb1: return (char) 0x00b1; // PLUS-MINUS SIGN
case 0xb2: return (char) 0x02db; // OGONEK
case 0xb3: return (char) 0x0142; // LATIN SMALL LETTER L WITH STROKE
case 0xb4: return (char) 0x00b4; // ACUTE ACCENT
case 0xb5: return (char) 0x00b5; // MIKRO SIGN
case 0xb6: return (char) 0x00b6; // PILCROW SIGN
case 0xb7: return (char) 0x00b7; // MIDDLE DOT
case 0xb8: return (char) 0x00b8; // CEDILLA
case 0xb9: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK
case 0xba: return (char) 0x015f; // LATIN SMALL LETTER S WITH CEDILLA
case 0xbb: return (char) 0x00bb; // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
case 0xbc: return (char) 0x013d; // LATIN CAPITAL LETTER L WITH CARON
case 0xbd: return (char) 0x02dd; // DOUBLE ACUTE ACCENT
case 0xbe: return (char) 0x013e; // LATIN SMALL LETTER L WITH CARON
case 0xbf: return (char) 0x017c; // LATIN SMALL LETTER Z WITH DOT ABOVE
case 0xc0: return (char) 0x0154; // LATIN CAPITAL LETTER R WITH ACUTE
case 0xc1: return (char) 0x00c1; // LATIN CAPITAL LETTER A WITH ACUTE
case 0xc2: return (char) 0x00c2; // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
case 0xc3: return (char) 0x0102; // LATIN CAPITAL LETTER A WITH BREVE
case 0xc4: return (char) 0x00c4; // LATIN CAPITAL LETTER A WITH DIAERESIS
case 0xc5: return (char) 0x0139; // LATIN CAPITAL LETTER L WITH ACUTE
case 0xc6: return (char) 0x0106; // LATIN CAPITAL LETTER C WITH ACUTE
case 0xc7: return (char) 0x00c7; // LATIN CAPITAL LETTER C WITH CEDILLA
case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON
case 0xc9: return (char) 0x00c9; // LATIN CAPITAL LETTER E WITH ACUTE
case 0xca: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK
case 0xcb: return (char) 0x00cb; // LATIN CAPITAL LETTER E WITH DIAERESIS
case 0xcc: return (char) 0x011a; // LATIN CAPITAL LETTER E WITH CARON
case 0xcd: return (char) 0x00cd; // LATIN CAPITAL LETTER I WITH ACUTE
case 0xce: return (char) 0x00ce; // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
case 0xcf: return (char) 0x010e; // LATIN CAPITAL LETTER D WITH CARON
case 0xd0: return (char) 0x0110; // LATIN CAPITAL LETTER D WITH STROKE
case 0xd1: return (char) 0x0143; // LATIN CAPITAL LETTER N WITH ACUTE
case 0xd2: return (char) 0x0147; // LATIN CAPITAL LETTER N WITH CARON
case 0xd3: return (char) 0x00d3; // LATIN CAPITAL LETTER O WITH ACUTE
case 0xd4: return (char) 0x00d4; // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
case 0xd5: return (char) 0x0150; // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
case 0xd6: return (char) 0x00d6; // LATIN CAPITAL LETTER O WITH DIAERESIS
case 0xd7: return (char) 0x00d7; // MULTIPLICATION SIGN
case 0xd8: return (char) 0x0158; // LATIN CAPITAL LETTER R WITH CARON
case 0xd9: return (char) 0x016e; // LATIN CAPITAL LETTER U WITH RING ABOVE
case 0xda: return (char) 0x00da; // LATIN CAPITAL LETTER U WITH ACUTE
case 0xdb: return (char) 0x0170; // LATIN CAPITAL LETTER U WITH WITH DOUBLE ACUTE
case 0xdc: return (char) 0x00dc; // LATIN CAPITAL LETTER U WITH DIAERESIS
case 0xdd: return (char) 0x00dd; // LATIN CAPITAL LETTER Y WITH ACUTE
case 0xde: return (char) 0x0162; // LATIN CAPITAL LETTER T WITH CEDILLA
case 0xdf: return (char) 0x00df; // LATIN SMALL LETTER SHARP S
case 0xe0: return (char) 0x0155; // LATIN SMALL LETTER R WITH ACUTE
case 0xe1: return (char) 0x00e1; // LATIN SMALL LETTER A WITH ACUTE
case 0xe2: return (char) 0x00e2; // LATIN SMALL LETTER A WITH CIRCUMFLEX
case 0xe3: return (char) 0x0103; // LATIN SMALL LETTER A WITH BREVE
case 0xe4: return (char) 0x00e4; // LATIN SMALL LETTER A WITH DIAERESIS
case 0xe5: return (char) 0x013a; // LATIN SMALL LETTER L WITH ACUTE
case 0xe6: return (char) 0x0107; // LATIN SMALL LETTER C WITH ACUTE
case 0xe7: return (char) 0x00e7; // LATIN SMALL LETTER C WITH CEDILLA
case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON 100D
case 0xe9: return (char) 0x00e9; // LATIN SMALL LETTER E WITH ACUTE
case 0xea: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK
case 0xeb: return (char) 0x00eb; // LATIN SMALL LETTER E WITH DIAERESIS
case 0xec: return (char) 0x011b; // LATIN SMALL LETTER E WITH CARON
case 0xed: return (char) 0x00ed; // LATIN SMALL LETTER I WITH ACUTE
case 0xee: return (char) 0x00ee; // LATIN SMALL LETTER I WITH CIRCUMFLEX
case 0xef: return (char) 0x010f; // LATIN SMALL LETTER D WITH CARON
case 0xf0: return (char) 0x0111; // LATIN SMALL LETTER D WITH STROKE
case 0xf1: return (char) 0x0144; // LATIN SMALL LETTER N WITH ACUTE
case 0xf2: return (char) 0x0148; // LATIN SMALL LETTER N WITH CARON
case 0xf3: return (char) 0x00f3; // LATIN SMALL LETTER O WITH ACUTE
case 0xf4: return (char) 0x00f4; // LATIN SMALL LETTER O WITH CIRCUMFLEX
case 0xf5: return (char) 0x0151; // LATIN SMALL LETTER O WITH DOUBLE ACUTE
case 0xf6: return (char) 0x00f6; // LATIN SMALL LETTER O WITH DIAERESIS
case 0xf7: return (char) 0x00f7; // DIVISION SIGN
case 0xf8: return (char) 0x0159; // LATIN SMALL LETTER R WITH CARON
case 0xf9: return (char) 0x016f; // LATIN SMALL LETTER U WITH RING ABOVE
case 0xfa: return (char) 0x00fa; // LATIN SMALL LETTER U WITH ACUTE
case 0xfb: return (char) 0x0171; // LATIN SMALL LETTER U WITH WITH DOUBLE ACUTE
case 0xfc: return (char) 0x00fc; // LATIN SMALL LETTER U WITH DIAERESIS
case 0xfd: return (char) 0x00fd; // LATIN SMALL LETTER Y WITH ACUTE
case 0xfe: return (char) 0x0163; // LATIN SMALL LETTER T WITH CEDILLA
case 0xff: return (char) 0x02d9; // DOT ABOVE
default:
return (char) P;
}
}
public static char cp1251ToUTF16Char(byte P)
{
switch (P)
{
case 0x80: return (char) 0x0402; // CYRILLIC CAPITAL LETTER DJE
case 0x81: return (char) 0x0403; // CYRILLIC CAPITAL LETTER GJE
case 0x82: return (char) 0x201a; // SINGLE LOW-9 QUOTATION MARK
case 0x83: return (char) 0x0453; // CYRILLIC SMALL LETTER GJE
case 0x84: return (char) 0x201e; // DOUBLE LOW-9 QUOTATION MARK
case 0x85: return (char) 0x2026; // HORIZONTAL ELLIPSIS
case 0x86: return (char) 0x2020; // DAGGER
case 0x87: return (char) 0x2021; // DOUBLE DAGGER
case 0x88: return (char) 0x20ac; // EURO SIGN
case 0x89: return (char) 0x2030; // PER MILLE SIGN
case 0x8a: return (char) 0x0409; // CYRILLIC CAPITAL LETTER LJE
case 0x8b: return (char) 0x2039; // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
case 0x8c: return (char) 0x040a; // CYRILLIC CAPITAL LETTER NJE
case 0x8d: return (char) 0x040c; // CYRILLIC CAPITAL LETTER KJE
case 0x8e: return (char) 0x040b; // CYRILLIC CAPITAL LETTER TSHE
case 0x8f: return (char) 0x040f; // CYRILLIC CAPITAL LETTER DZHE
case 0x90: return (char) 0x0452; // CYRILLIC SMALL LETTER DJE
case 0x91: return (char) 0x2018; // LEFT SINGLE QUOTATION MARK
case 0x92: return (char) 0x2019; // RIGHT SINGLE QUOTATION MARK
case 0x93: return (char) 0x201c; // LEFT DOUBLE QUOTATION MARK
case 0x94: return (char) 0x201d; // RIGHT DOUBLE QUOTATION MARK
case 0x95: return (char) 0x2022; // BULLET
case 0x96: return (char) 0x2013; // EN DASH
case 0x97: return (char) 0x2014; // EM DASH
case 0x98: throw new InvalidOperationException("Invalid cp1251 sequence [" + P.ToString() + "]");
case 0x99: return (char) 0x2122; // TRADE MARK SIGN
case 0x9a: return (char) 0x0459; // CYRILLIC SMALL LETTER LJE
case 0x9b: return (char) 0x203a; // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
case 0x9c: return (char) 0x045a; // CYRILLIC SMALL LETTER NJE
case 0x9d: return (char) 0x045c; // CYRILLIC SMALL LETTER KJE
case 0x9e: return (char) 0x045b; // CYRILLIC SMALL LETTER TSHE
case 0x9f: return (char) 0x045f; // CYRILLIC SMALL LETTER DZHE
case 0xa0: return (char) 0x00a0; // NO-BREAK SPACE
case 0xa1: return (char) 0x040e; // CYRILLIC CAPITAL LETTER SHORT U
case 0xa2: return (char) 0x045e; // CYRILLIC SMALL LETTER SHORT U
case 0xa3: return (char) 0x0408; // CYRILLIC CAPITAL LETTER JE
case 0xa4: return (char) 0x00a4; // CURRENCY SIGN
case 0xa5: return (char) 0x0490; // CYRILLIC CAPITAL LETTER GHE WITH UPTURN
case 0xa8: return (char) 0x0401; // CYRILLIC CAPITAL LETTER IO
case 0xaa: return (char) 0x0404; // CYRILLIC CAPITAL LETTER UKRAINIAN IE
case 0xaf: return (char) 0x0407; // CYRILLIC CAPITAL LETTER YI
case 0xb2: return (char) 0x0406; // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
case 0xb3: return (char) 0x0456; // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
case 0xb4: return (char) 0x0491; // CYRILLIC SMALL LETTER GHE WITH UPTURN
case 0xb8: return (char) 0x0451; // CYRILLIC SMALL LETTER IO
case 0xb9: return (char) 0x2116; // NUMERO SIGN
case 0xba: return (char) 0x0454; // CYRILLIC SMALL LETTER UKRAINIAN IE
case 0xbc: return (char) 0x0458; // CYRILLIC SMALL LETTER JE
case 0xbd: return (char) 0x0405; // CYRILLIC CAPITAL LETTER DZE
case 0xbe: return (char) 0x0455; // CYRILLIC SMALL LETTER DZE
case 0xbf: return (char) 0x0457; // CYRILLIC SMALL LETTER YI
}
if ( (P >= 0xc0) | (P <= 0xff) )
return System.Convert.ToChar( P + 0x0350);
return (char) P;
}
public static char cp1252ToUTF16Char(byte P)
{
// Provided by Olaf Lösken. (ported by DrW)
// Info taken from
// ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
switch (P)
{
case 0x80 : return (char) 0x20AC; //EUROSIGN
case 0x81 : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]");
case 0x82 : return (char) 0x201A; //SINGLE LOW-9 QUOTATION MARK
case 0x83 : return (char) 0x0192; //ATIN SMALL LETTER F WITH HOOK
case 0x84 : return (char) 0x201E; //DOUBLE LOW-9 QUOTATION MARK
case 0x85 : return (char) 0x2026; //HORIZONTAL ELLIPSIS
case 0x86 : return (char) 0x2020; //DAGGER
case 0x87 : return (char) 0x2021; //DOUBLE DAGGER
case 0x88 : return (char) 0x02C6; //MODIFIER LETTER CIRCUMFLEX ACCENT
case 0x89 : return (char) 0x2030; //PER MILLE SIGN
case 0x8A : return (char) 0x0160; //LATIN CAPITAL LETTER S WITH CARON
case 0x8B : return (char) 0x2039; //SINGLE LEFT-POINTING ANGLE QUOTATION MARK
case 0x8C : return (char) 0x0152; //LATIN CAPITAL LIGATURE OE
case 0x8D : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]");
case 0x8E : return (char) 0x017D; //LATIN CAPITAL LETTER Z WITH CARON
case 0x8F : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]");
case 0x90 : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]");
case 0x91 : return (char) 0x2018; //LEFT SINGLE QUOTATION MARK
case 0x92 : return (char) 0x2019; //RIGHT SINGLE QUOTATION MARK
case 0x93 : return (char) 0x201C; //LEFT DOUBLE QUOTATION MARK
case 0x94 : return (char) 0x201D; //RIGHT DOUBLE QUOTATION MARK
case 0x95 : return (char) 0x2022; //BULLET
case 0x96 : return (char) 0x2013; //EN DASH
case 0x97 : return (char) 0x2014; //EM DASH
case 0x98 : return (char) 0x02DC; //SMALL TILDE
case 0x99 : return (char) 0x2122; //TRADE MARK SIGN
case 0x9A : return (char) 0x0161; //LATIN SMALL LETTER S WITH CARON
case 0x9B : return (char) 0x203A; //SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
case 0x9C : return (char) 0x0153; //LATIN SMALL LIGATURE OE
case 0x9D : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]");
case 0x9E : return (char) 0x017E; //LATIN SMALL LETTER Z WITH CARON
case 0x9F : return (char) 0x0178; //LATIN CAPITAL LETTER Y WITH D
default:
return (char) P;
}
}
///
/// Read in a UTF-8 encoded character. If no character is on the stream, throws
/// an ArgumentException.
///
/// Thrownn if 1) called at EOF,
/// 2) invalid UTF-8 encoding found.
/// Stream to read from
/// Encoded character (could be two characters, upper/lower Surragate pair)
public static int ReadUTF8Char(Stream stream)
{
byte[] buf = new byte[1];
if ( stream.Read(buf, 0, 1) != 1)
throw new InvalidOperationException("Unexptected EOF reading stream");
if (buf[0] >= 0x80) // UTF-8 sequence
{
int numOctets = 1;
byte first = buf[0];
int mask = 0x40;
int ucs4 = buf[0];
// first octed must be 110x xxxx to 1111 110x if high order bit set
if ( (buf[0] & 0xc0) != 0xc0)
throw new InvalidOperationException("Invalid UTF-8 sequence at position " + stream.Position.ToString());
// we could mask off the first octet and get the number of octets,
// but it's easier to cycle through. If the bit is set, we have another character to read
while ( (mask & first) != 0 )
{
// read next character of stream
if (stream.Length == stream.Position)
throw new InvalidOperationException("Aborted UTF-8 (unexpected EOF) sequence at position " + stream.Position.ToString());
if ( stream.Read(buf, 0, 1) != 1)
throw new InvalidOperationException("Aborted UTF-8 sequence (missing characters) at position " + stream.Position.ToString());
// all octet sequence bytes start with 10nn nnnn, or they are invalid
if ( (buf[0] & 0xc0) != 0x80 )
throw new InvalidOperationException("Invalid UTF-8 sequence at position " + stream.Position.ToString());
// 6 bits are valid in this item (low order 6)
// mask them off and add them
ucs4 = (ucs4 << 6) | (buf[0] & 0x3F); // add bits to result
numOctets++;
mask = mask >> 1; // adjust mask
}
// Max 6 octets in sequence
if ( numOctets > 6)
throw new InvalidOperationException("Invalid UTF-8 sequence (no 0-bit in hdr) at position " + stream.Position.ToString());
// UTF-8 can encode up to the following values, per octet size
int[] MaxCode = {0x7F, 0x7FF, 0xFFFF, 0x1FFFFF, 0x3FFFFFF, 0x7FFFFFFF};
// mask off the original header bits
ucs4 = ucs4 & MaxCode[numOctets - 1]; // array is zero-based
// check for invalid sequence as suggested by RFC2279
// (check that proper octet sequence size was used to encode character)
// (if 0x7F was mapped to a 2-octet sequence, this is an improper coding)
if ( (numOctets > 1) && (ucs4 <= MaxCode[numOctets -2]))
throw new InvalidOperationException("Invalid UTF-8 sequence (invalid sequence) at position " + stream.Position.ToString());
return ucs4;
}
else
// 1-byte value, return it
return buf[0];
}
public static char Utf16LowSurrogate(int val)
{
int val2 = 0xDC00 ^ (val & 0x03FF); // 0xdc00 xor (val and 0x03ff)
return (char) val2;
}
public static char Utf16HighSurrogate(int val)
{
int value2 = 0xD7C0 + ( val >> 10 );
return (char) value2;
}
}
}