Browse Source

+ initial revision

florian 25 years ago
parent
commit
66cbbfb910
2 changed files with 274 additions and 0 deletions
  1. 191 0
      rtl/inc/charset.pp
  2. 83 0
      utils/creumap.pp

+ 191 - 0
rtl/inc/charset.pp

@@ -0,0 +1,191 @@
+{
+    $Id$
+    This file is part of the Free Pascal run time library.
+    Copyright (c) 2000 by Florian Klaempfl
+    member of the Free Pascal development team.
+
+    This unit implements several classes for charset conversions
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+{$mode objfpc}
+unit charset;
+
+  interface
+
+    type
+       tunicodechar = word;
+       tunicodestring = ^tunicodechar;
+
+       tcsconvert = class
+         // !!!!!!1constructor create;
+       end;
+
+       tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined,
+         umf_unused);
+
+       punicodecharmapping = ^tunicodecharmapping;
+       tunicodecharmapping = record
+          unicode : tunicodechar;
+          flag : tunicodecharmappingflag;
+          reserved : byte;
+       end;
+
+       punicodemap = ^tunicodemap;
+       tunicodemap = record
+          cpname : shortstring;
+          map : punicodecharmapping;
+          lastchar : longint;
+          next : punicodemap;
+          internalmap : boolean;
+       end;
+
+       tcp2unicode = class(tcsconvert)
+       end;
+
+    function loadunicodemapping(const cpname,f : string) : punicodemap;
+    procedure registermapping(p : punicodemap);
+
+  implementation
+
+    var
+       mappings : punicodemap;
+
+    function loadunicodemapping(const cpname,f : string) : punicodemap;
+
+      var
+         data : punicodecharmapping;
+         datasize : longint;
+         t : text;
+         s,hs : string;
+         scanpos,charpos,unicodevalue : longint;
+         code : word;
+         flag : tunicodecharmappingflag;
+         p : punicodemap;
+         lastchar : longint;
+
+      begin
+         lastchar:=-1;
+         loadunicodemapping:=nil;
+         datasize:=256;
+         getmem(data,sizeof(tunicodecharmapping)*datasize);
+         assign(t,f);
+         {$I-}
+         reset(t);
+         {$I+}
+         if ioresult<>0 then
+           begin
+              freemem(data,sizeof(tunicodecharmapping)*datasize);
+              exit;
+           end;
+         readln(t,s);
+         while not(eof(t)) do
+           begin
+              if (s[1]='0') and (s[2]='x') then
+                begin
+                   flag:=umf_unused;
+                   scanpos:=3;
+                   hs:='$';
+                   while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
+                     begin
+                        hs:=hs+s[scanpos];
+                        inc(scanpos);
+                     end;
+                   val(hs,charpos,code);
+                   if code<>0 then
+                     begin
+                        freemem(data,sizeof(tunicodecharmapping)*datasize);
+                        close(t);
+                        exit;
+                     end;
+                   while not(s[scanpos] in ['0','#']) do
+                     inc(scanpos);
+                   if s[scanpos]='#' then
+                     begin
+                        { special char }
+                        unicodevalue:=$ffff;
+                        hs:=copy(s,scanpos,length(s)-scanpos+1);
+                        if hs='#DBCS LEAD BYTE' then
+                          flag:=umf_leadbyte;
+                     end
+                   else
+                     begin
+                        { C hex prefix }
+                        inc(scanpos,2);
+                        hs:='$';
+                        while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
+                          begin
+                             hs:=hs+s[scanpos];
+                             inc(scanpos);
+                          end;
+                        val(hs,unicodevalue,code);
+                        if code<>0 then
+                          begin
+                             freemem(data,sizeof(tunicodecharmapping)*datasize);
+                             close(t);
+                             exit;
+                          end;
+                        if charpos>datasize then
+                          begin
+                             { allocate 1024 bytes more because         }
+                             { if we need more than 256 entries it's    }
+                             { probably a mbcs with a lot of            }
+                             { entries                                  }
+                             datasize:=charpos+1024;
+                             reallocmem(data,sizeof(tunicodecharmapping)*datasize);
+                          end;
+                        flag:=umf_noinfo;
+                     end;
+                   data[charpos].flag:=flag;
+                   data[charpos].unicode:=unicodevalue;
+                   if charpos>lastchar then
+                     lastchar:=charpos;
+                end;
+              readln(t,s);
+           end;
+         close(t);
+         new(p);
+         p^.lastchar:=lastchar;
+         p^.cpname:=cpname;
+         p^.internalmap:=false;
+         p^.next:=nil;
+         p^.map:=data;
+         loadunicodemapping:=p;
+      end;
+
+    procedure registermapping(p : punicodemap);
+
+      begin
+         p^.next:=mappings;
+         mappings:=p;
+      end;
+
+  var
+     hp : punicodemap;
+
+initialization
+  mappings:=nil;
+finalization
+  while assigned(mappings) do
+    begin
+       hp:=mappings^.next;
+       if not(mappings^.internalmap) then
+         begin
+            freemem(mappings^.map);
+            dispose(mappings);
+         end;
+       mappings:=hp;
+    end;
+end.
+{
+  $Log$
+  Revision 1.1  2000-08-17 07:29:39  florian
+    + initial revision
+
+}

+ 83 - 0
utils/creumap.pp

@@ -0,0 +1,83 @@
+{
+    $Id$
+    This file is part of the Free Pascal run time library.
+    Copyright (c) 2000 by Florian Klaempfl
+
+    It creates pascal units from unicode mapping files
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+program creumap;
+
+  uses
+     charset;
+
+  procedure doerror;
+
+    begin
+       writeln('Usage: creumap <cpname>');
+       writeln('A mapping file called <cpname>.txt must be present');
+       halt(1);
+    end;
+
+  var
+     p : punicodemap;
+     i : longint;
+     t : text;
+
+begin
+   if paramcount<>1 then
+     doerror;
+   p:=loadunicodemapping(paramstr(1),paramstr(1)+'.txt');
+   if p=nil then
+     doerror;
+   assign(t,paramstr(1)+'.pp');
+   rewrite(t);
+   writeln(t,'{ This is an automatically created file, so don''t edit it }');
+   writeln(t,'unit ',p^.cpname,';');
+   writeln(t);
+   writeln(t,'  interface');
+   writeln(t);
+   writeln(t,'  implementation');
+   writeln(t);
+   writeln(t,'  uses');
+   writeln(t,'     charset;');
+   writeln(t);
+   writeln(t,'  const');
+   writeln(t,'     map : array[0..',p^.lastchar,'] of tunicodecharmapping = (');
+   for i:=0 to p^.lastchar do
+     begin
+        write(t,'       (unicode : ',p^.map[i].unicode,'; flag : ');
+        case p^.map[i].flag of
+           umf_noinfo : write(t,'umf_noinfo');
+           umf_leadbyte : write(t,'umf_leadbyte');
+           umf_undefined : write(t,'umf_undefined');
+           umf_unused : write(t,'umf_unused');
+        end;
+        write(t,')');
+        if i<>p^.lastchar then
+          writeln(t,',')
+        else
+          writeln(t);
+     end;
+   writeln(t,'     );');
+   writeln(t);
+   writeln(t,'     unicodemap : tunicodemap = (');
+   writeln(t,'       cpname : ''',p^.cpname,''';');
+   writeln(t,'       map : @map;');
+   writeln(t,'       lastchar : ',p^.lastchar,';');
+   writeln(t,'       next : nil;');
+   writeln(t,'       internalmap : true');
+   writeln(t,'     );');
+   writeln(t);
+   writeln(t,'  begin');
+   writeln(t,'     registermapping(@unicodemap)');
+   writeln(t,'  end.');
+   close(t);
+end.