Prechádzať zdrojové kódy

* Delphi-style multiline strings

Michaël Van Canneyt 3 mesiacov pred
rodič
commit
548a07c0da

+ 4 - 1
compiler/msg/errore.msg

@@ -487,8 +487,11 @@ scan_e_trimcount_out_of_range=02117_E_The value of MULTILINESTRINGTRIMLEFT canno
 % MULTILINESTRINGTRIMLEFT is stored in a "word" field, and thus is restricted to the range 0..65535.
 scan_e_illegal_directive=02118_E_Illegal compiler directive "$1"
 % You have specified a compiler directive that cannot (for one of several possible reasons) currently be used.
-scan_f_unterminated_multiline_string=02119_F_Unterminated multi-line string beginning at line $1, column $2.
+scan_f_unterminated_multiline_string=02119_F_Unterminated multi-line string starting at line $1, column $2.
 % The file contains an "unbalanced" number of multi-line string denoting backtick characters.
+scan_e_improperly_indented_multiline_string=02120_E_Incorrectly indented multi-line string (need $1 whitespace chars) starting at line $2, column $3.
+% The multi-line string does not have the proper indentation, every line
+% must have at least tjhen number of spaces as in the last line.
 %\end{description}
 #
 # Parser

+ 21 - 0
compiler/scandir.pas

@@ -1030,6 +1030,26 @@ unit scandir;
           Message(scan_e_unknown_lineending_type);
       end;
 
+    procedure dir_textblock;
+      var
+        s : string;
+      begin
+        if not (m_delphi in current_settings.modeswitches) then
+          Message1(scan_e_illegal_directive,'TEXTBLOCK');
+        current_scanner.skipspace;
+        s:=current_scanner.readid;
+        if (s='CR') then
+          current_settings.lineendingtype:=le_cr
+        else if (s='CRLF') then
+          current_settings.lineendingtype:=le_crlf
+        else if (s='LF') then
+          current_settings.lineendingtype:=le_lf
+        else if (s='NATIVE') then
+          current_settings.lineendingtype:=le_platform
+        else
+          Message(scan_e_unknown_lineending_type);
+      end;
+
     procedure dir_multilinestringtrimleft;
       var
         count : longint;
@@ -2232,6 +2252,7 @@ unit scandir;
         AddDirective('MMX',directive_all, @dir_mmx);
         AddDirective('MODE',directive_all, @dir_mode);
         AddDirective('MODESWITCH',directive_all, @dir_modeswitch);
+        AddDirective('TEXTBLOCK',directive_all, @dir_textblock);
         AddDirective('MULTILINESTRINGLINEENDING',directive_all, @dir_multilinestringlineending);
         AddDirective('MULTILINESTRINGTRIMLEFT',directive_all, @dir_multilinestringtrimleft);
         AddDirective('NAMESPACE',directive_all, @dir_namespace);

+ 669 - 418
compiler/scanner.pas

@@ -114,6 +114,8 @@ interface
        private
          procedure do_gettokenpos(out tokenpos: longint; out filepos: tfileposinfo);
          procedure cachenexttokenpos;
+         procedure postprocessmultiline(len, quote_pos, quote_count: integer);
+         procedure postprocessutf8multiline(len, quote_pos, quote_count: integer);
          procedure setnexttoken;
          procedure savetokenpos;
          procedure restoretokenpos;
@@ -258,6 +260,7 @@ interface
           function  readquotedstring:string;
           function  readlongcomment(include_special_char: boolean = false):RawByteString;
           function  readlongquotedstring:RawByteString;
+          function readstringconstant: boolean;
           function  readstate:char;
           function  readoptionalstate(fallback:char):char;
           function  readstatedefault:char;
@@ -5413,7 +5416,669 @@ type
         current_commentstyle:=comment_none;
       end;
 
+    procedure tscannerfile.postprocessutf8multiline(len,quote_pos,quote_count : integer);
+    var
+      malformed : boolean;
+      start, i,stripcol,col,newlen : integer;
+      crlf : boolean;
+      tmp : tcompilerwidestring;
+      c : tcompilerwidechar;
+      file_pos : tfileposinfo;
+
+    begin
+      stripcol:=quote_pos;
+      malformed:=false;
+      newlen:=0;
+      col:=0;
+      start:=1;
+      initwidestring(tmp);
+      { Strip initial cr/lf }
+      Case current_settings.lineendingtype of
+        le_cr,le_lf : inc(start);
+        le_crlf : inc(start,2);
+        le_source :
+          begin
+          inc(start);
+          if (getcharwidestring(patternw,1)=13) and (getcharwidestring(patternw,start)=10) then
+            inc(start);
+          end;
+        le_platform : inc(start,length(target_info.newline));
+      end;
+      { we don't need the last added quotes }
+      dec(len,quote_count-1);
+      for I:=Start to len do
+        begin
+        c:=getcharwidestring(patternw,i);
+        inc(col);
+        if (col>stripcol) or (c=10) or (c=13) then
+          begin
+          inc(newlen);
+          concatwidestringchar(patternw,c);
+          end
+        else
+          begin
+          // if less spaces than in the last line, report error
+          if not (c in [9,32,11]) then
+            begin
+            if not malformed then
+              begin
+              malformed:=true;
+              message3(scan_e_improperly_indented_multiline_string,
+                      tostr(stripcol),
+                      tostr(multiline_start_line),
+                      tostr(multiline_start_column));
+              end;
+            end;
+          end;
+        if (c=10) or (c=13) then
+          col:=0;
+        end;
+      // remove last CR/LF
+      c:=getcharwidestring(tmp,newlen);
+      if (c=10) or (c=13) then
+        begin
+        Case current_settings.lineendingtype of
+          le_cr,le_lf : dec(newlen);
+          le_crlf : dec(newlen,2);
+          le_platform : dec(newlen,length(target_info.newline));
+          le_source :
+            begin
+            crlf:=getcharwidestring(tmp,newlen)=10;
+            dec(newlen);
+            if crlf and (newLen>0) and (getcharwidestring(tmp,newlen)=13) then
+              dec(newlen);
+            end;
+        end;
+        end;
+      tmp.len:=newLen;
+      donewidestring(patternw);
+      patternw:=tmp;
+    end;
 
+    procedure tscannerfile.postprocessmultiline(len,quote_pos,quote_count : integer);
+
+    var
+      malformed : boolean;
+      start, i,stripcol,col,newlen : integer;
+      crlf : boolean;
+      tmp : ansistring;
+      c : ansichar;
+      file_pos : tfileposinfo;
+
+    begin
+      stripcol:=quote_pos;
+      malformed:=false;
+      newlen:=0;
+      setlength(tmp,len-quote_count+1);
+      col:=0;
+      start:=1;
+      { Strip initial cr/lf }
+      Case current_settings.lineendingtype of
+        le_cr,le_lf : inc(start);
+        le_crlf : inc(start,2);
+        le_platform : inc(start,length(target_info.newline));
+        le_source :
+          begin
+          inc(start);
+          if (cstringPattern[1]=#13) and (cstringpattern[start]=#10) then
+            inc(start);
+          end;
+      end;
+      { we don't need the last added quotes }
+      dec(len,quote_count-1);
+      for I:=Start to len do
+        begin
+        c:=cstringpattern[i];
+        inc(col);
+        if (col>stripcol) or (c in [#10,#13]) then
+          begin
+          inc(newlen);
+          tmp[newlen]:=c;
+          end
+        else
+          begin
+          // if less spaces than in the last line, report error
+          if not (c in [#9,#32,#11]) then
+            begin
+            if not malformed then
+              begin
+              malformed:=true;
+              message3(scan_e_improperly_indented_multiline_string,
+                      tostr(stripcol),
+                      tostr(multiline_start_line),
+                      tostr(multiline_start_column));
+              end;
+            end;
+          end;
+        if c in [#10,#13] then
+          col:=0;
+        end;
+      // remove last CR/LF
+      if tmp[newlen] in [#10,#13] then
+        begin
+        Case current_settings.lineendingtype of
+          le_cr,le_lf : dec(newlen);
+          le_crlf : dec(newlen,2);
+          le_platform : dec(newlen,length(target_info.newline));
+          le_source :
+              begin
+              crlf:=tmp[newlen]=#10;
+              dec(newlen);
+              if crlf and (tmp[newlen]=#13) then
+                Dec(newlen);
+              end;
+
+        end;
+        end;
+      SetLength(tmp,newlen);
+      cstringpattern:=tmp;
+    end;
+
+    function tscannerfile.readstringconstant : boolean;
+
+    type
+       tQuoteStyle = (qsNone,qsBacktick,qsMultiQuote);
+
+    var
+      trimcount,m,code,len,quote_count,init_quote_count,whitespace_count,quote_col : integer;
+      style : tQuoteStyle;
+      iswidestring : boolean;
+      asciinr : string[33];
+      last_c : char;
+      whitespace_only, had_newline, first_multiline,backtick : boolean;
+      d : cardinal;
+      w : word;
+
+    label
+      quote_label;
+    begin
+      last_c :=#0;
+      trimcount:=0;
+      quote_col:=0;
+      whitespace_count:=0;
+      had_newline:=false;
+      had_multiline_string:=in_multiline_string;
+      backtick:=(c='`');
+      if backtick then
+        style:=qsBacktick
+      else
+        style:=qsNone;
+      whitespace_only:=true;
+      in_multiline_string:=backtick;
+      quote_count:=0;
+      whitespace_only:=true;
+      if in_multiline_string then
+        begin
+          if not (m_multiline_strings in current_settings.modeswitches) then
+            Illegal_Char(c)
+          else
+            begin
+              multiline_start_line:=current_filepos.line;
+              multiline_start_column:=current_filepos.column;
+            end;
+        end;
+      len:=0;
+      cstringpattern:='';
+      iswidestring:=false;
+      if c='^' then
+       begin
+         readchar;
+         c:=upcase(c);
+         if (block_type in [bt_type,bt_const_type,bt_var_type]) or
+            (lasttoken=_ID) or (lasttoken=_NIL) or (lasttoken=_OPERATOR) or
+            (lasttoken=_RKLAMMER) or (lasttoken=_RECKKLAMMER) or (lasttoken=_CARET) then
+          begin
+            token:=_CARET;
+            exit(true);
+          end
+         else
+          begin
+            inc(len);
+            setlength(cstringpattern,256);
+            if c<#64 then
+              cstringpattern[len]:=chr(ord(c)+64)
+            else
+              cstringpattern[len]:=chr(ord(c)-64);
+            readchar;
+          end;
+       end;
+      repeat
+        case c of
+          '#' :
+            begin
+              readchar; { read # }
+              case c of
+                '$':
+                  begin
+                    readchar; { read leading $ }
+                    asciinr:='$';
+                    while (upcase(c) in ['A'..'F','0'..'9']) and (length(asciinr)<=7) do
+                      begin
+                        asciinr:=asciinr+c;
+                        readchar;
+                      end;
+                  end;
+                '&':
+                  begin
+                    readchar; { read leading $ }
+                    asciinr:='&';
+                    while (upcase(c) in ['0'..'7']) and (length(asciinr)<=8) do
+                      begin
+                        asciinr:=asciinr+c;
+                        readchar;
+                      end;
+                  end;
+                '%':
+                  begin
+                    readchar; { read leading $ }
+                    asciinr:='%';
+                    while (upcase(c) in ['0','1']) and (length(asciinr)<=22) do
+                      begin
+                        asciinr:=asciinr+c;
+                        readchar;
+                      end;
+                  end;
+                else
+                  begin
+                    asciinr:='';
+                    while (c in ['0'..'9']) and (length(asciinr)<=8) do
+                      begin
+                        asciinr:=asciinr+c;
+                        readchar;
+                      end;
+                  end;
+              end;
+              val(asciinr,m,code);
+              if (asciinr='') or (code<>0) then
+                Message(scan_e_illegal_char_const)
+              else if (m<0) or (m>255) or (length(asciinr)>3) then
+                begin
+                   if (m>=0) and (m<=$10FFFF) then
+                     begin
+                       if not iswidestring then
+                        begin
+                          if len>0 then
+                            ascii2unicode(@cstringpattern[1],len,current_settings.sourcecodepage,patternw)
+                          else
+                            ascii2unicode(nil,len,current_settings.sourcecodepage,patternw);
+                          iswidestring:=true;
+                          len:=0;
+                        end;
+                       if m<=$FFFF then
+                         concatwidestringchar(patternw,tcompilerwidechar(m))
+                       else
+                         begin
+                           { split into surrogate pair }
+                           dec(m,$10000);
+                           concatwidestringchar(patternw,tcompilerwidechar((m shr 10) + $D800));
+                           concatwidestringchar(patternw,tcompilerwidechar((m and $3FF) + $DC00));
+                         end;
+                     end
+                   else
+                     Message(scan_e_illegal_char_const)
+                end
+              else if iswidestring then
+                concatwidestringchar(patternw,asciichar2unicode(char(m)))
+              else
+                begin
+                  if len>=length(cstringpattern) then
+                    setlength(cstringpattern,length(cstringpattern)+256);
+                  inc(len);
+                  cstringpattern[len]:=chr(m);
+                end;
+            end;
+          '''','`' :
+            begin
+              inc(quote_count);
+              had_multiline_string:=in_multiline_string;
+              if style<>qsMultiQuote then
+                begin
+                in_multiline_string:=(c='`');
+                if in_multiline_string then
+                  backtick:=true
+                else
+                  style:=qsNone;
+                first_multiline:=in_multiline_string and (last_c in [#0,#32,#61]);
+                end;
+              repeat
+                readchar;
+                quote_label:
+                  case c of
+                    #26 :
+                      end_of_file;
+                    #32,#9,#11 :
+                      begin
+                      inc(whitespace_count);
+                      if (had_newline or first_multiline) and backtick and
+                         (current_settings.whitespacetrimauto or
+                         (current_settings.whitespacetrimcount>0)) then
+                        begin
+                          if current_settings.whitespacetrimauto then
+                            trimcount:=multiline_start_column
+                          else
+                            trimcount:=current_settings.whitespacetrimcount;
+                          while (c in [#32,#9,#11]) and (trimcount>0) do
+                            begin
+                              readchar;
+                              dec(trimcount);
+                            end;
+                          had_newline:=false;
+                          first_multiline:=false;
+                          goto quote_label;
+                        end;
+                      end;
+                    #10,#13 :
+                      begin
+                      whitespace_only:=true;
+                      whitespace_count:=0;
+                      if not in_multiline_string then
+                        begin
+                          if had_multiline_string then
+                            Message2(scan_f_unterminated_multiline_string,
+                                     tostr(multiline_start_line),
+                                     tostr(multiline_start_column))
+                          else if (not backtick)
+                                   and ((quote_count>2) and ((quote_count mod 2)=1))
+                                   and (m_multiline_strings in current_settings.modeswitches) then
+                            begin
+                            style:=qsMultiQuote;
+                            init_quote_count:=quote_count;
+                            multiline_start_line:=current_filepos.line;
+                            multiline_start_column:=current_filepos.column;
+                            in_multiline_string:=true;
+                            had_multiline_string:=true;
+                            trimcount:=0;
+                            len:=0;
+                            if c=#13 then
+                              begin
+                              readchar;
+                              if c<>#10 then
+                                goto quote_label;
+                              end;
+                            end
+                          else
+                            Message(scan_f_string_exceeds_line);
+                        end;
+                      end;
+                    '''' :
+                      begin
+                      inc(quote_count);
+                      if not in_multiline_string then
+                        begin
+                          readchar;
+                          if c='''' then
+                            inc(quote_count)
+                          else
+                            break;
+                        end
+                      else if not backtick then
+                        if whitespace_only and (quote_count=init_quote_count) then
+                          begin
+                          in_multiline_string:=false;
+                          quote_col:=whitespace_count;
+                          readchar;
+                          break;
+                          end;
+                      end;
+                    '`' :
+                      if in_multiline_string then
+                        begin
+                          readchar;
+                          if c<>'`' then
+                           break;
+                        whitespace_only:=false;
+                        end;
+                    else
+                      whitespace_only:=false;
+                      quote_count:=0;
+                    end;
+                first_multiline:=false;
+                { interpret as utf-8 string? }
+                if (ord(c)>=$80) and (current_settings.sourcecodepage=CP_UTF8) then
+                  begin
+                    { convert existing string to an utf-8 string }
+                    if not iswidestring then
+                      begin
+                        if len>0 then
+                          ascii2unicode(@cstringpattern[1],len,current_settings.sourcecodepage,patternw)
+                        else
+                          ascii2unicode(nil,len,current_settings.sourcecodepage,patternw);
+                        iswidestring:=true;
+                        len:=0;
+                      end;
+                    { four chars }
+                    if (ord(c) and $f0)=$f0 then
+                      begin
+                        { this always represents a surrogate pair, so
+                          read as 32-bit value and then split into
+                          the corresponding pair of two wchars }
+                        d:=ord(c) and $f;
+                        readchar;
+                        if (ord(c) and $c0)<>$80 then
+                          message(scan_e_utf8_malformed);
+                        d:=(d shl 6) or (ord(c) and $3f);
+                        readchar;
+                        if (ord(c) and $c0)<>$80 then
+                          message(scan_e_utf8_malformed);
+                        d:=(d shl 6) or (ord(c) and $3f);
+                        readchar;
+                        if (ord(c) and $c0)<>$80 then
+                          message(scan_e_utf8_malformed);
+                        d:=(d shl 6) or (ord(c) and $3f);
+                        if d<$10000 then
+                          message(scan_e_utf8_malformed);
+                        d:=d-$10000;
+                        { high surrogate }
+                        w:=$d800+(d shr 10);
+                        concatwidestringchar(patternw,w);
+                        { low surrogate }
+                        w:=$dc00+(d and $3ff);
+                        concatwidestringchar(patternw,w);
+                      end
+                    { three chars }
+                    else if (ord(c) and $e0)=$e0 then
+                      begin
+                        w:=ord(c) and $f;
+                        readchar;
+                        if (ord(c) and $c0)<>$80 then
+                          message(scan_e_utf8_malformed);
+                        w:=(w shl 6) or (ord(c) and $3f);
+                        readchar;
+                        if (ord(c) and $c0)<>$80 then
+                          message(scan_e_utf8_malformed);
+                        w:=(w shl 6) or (ord(c) and $3f);
+                        concatwidestringchar(patternw,w);
+                      end
+                    { two chars }
+                    else if (ord(c) and $c0)<>0 then
+                      begin
+                        w:=ord(c) and $1f;
+                        readchar;
+                        if (ord(c) and $c0)<>$80 then
+                          message(scan_e_utf8_malformed);
+                        w:=(w shl 6) or (ord(c) and $3f);
+                        concatwidestringchar(patternw,w);
+                      end
+                    { illegal }
+                    else if (ord(c) and $80)<>0 then
+                      message(scan_e_utf8_malformed)
+                    else
+                      concatwidestringchar(patternw,tcompilerwidechar(c))
+                  end
+                else if iswidestring then
+                  begin
+                    if in_multiline_string and (c in [#10,#13]) and (not ((c=#10) and (last_c=#13))) then
+                      begin
+                        if current_settings.sourcecodepage=CP_UTF8 then
+                          begin
+                            case current_settings.lineendingtype of
+                              le_cr :
+                                concatwidestringchar(patternw,ord(#13));
+                              le_crlf :
+                                begin
+                                  concatwidestringchar(patternw,ord(#13));
+                                  concatwidestringchar(patternw,ord(#10));
+                                end;
+                              le_lf :
+                                concatwidestringchar(patternw,ord(#10));
+                              le_platform :
+                                begin
+                                  if target_info.newline=#13 then
+                                    concatwidestringchar(patternw,ord(#13))
+                                  else if target_info.newline=#13#10 then
+                                    begin
+                                      concatwidestringchar(patternw,ord(#13));
+                                      concatwidestringchar(patternw,ord(#10));
+                                    end
+                                  else if target_info.newline=#10 then
+                                    concatwidestringchar(patternw,ord(#10));
+                                end;
+                              le_source :
+                                concatwidestringchar(patternw,ord(c));
+                            end;
+                          end
+                        else
+                          case current_settings.lineendingtype of
+                            le_cr :
+                              concatwidestringchar(patternw,asciichar2unicode(#13));
+                            le_crlf :
+                              begin
+                                concatwidestringchar(patternw,asciichar2unicode(#13));
+                                concatwidestringchar(patternw,asciichar2unicode(#10));
+                              end;
+                            le_lf :
+                              concatwidestringchar(patternw,asciichar2unicode(#10));
+                            le_platform :
+                              begin
+                                if target_info.newline=#13 then
+                                  concatwidestringchar(patternw,asciichar2unicode(#13))
+                                else if target_info.newline=#13#10 then
+                                  begin
+                                    concatwidestringchar(patternw,asciichar2unicode(#13));
+                                    concatwidestringchar(patternw,asciichar2unicode(#10));
+                                  end
+                                else if target_info.newline=#10 then
+                                  concatwidestringchar(patternw,asciichar2unicode(#10));
+                              end;
+                            le_source :
+                              concatwidestringchar(patternw,asciichar2unicode(c));
+                          end;
+                        had_newline:=true;
+                        inc(line_no);
+                      end
+                    else if not (in_multiline_string and (c in [#10,#13])) then
+                      begin
+                        if current_settings.sourcecodepage=CP_UTF8 then
+                          concatwidestringchar(patternw,ord(c))
+                        else
+                          concatwidestringchar(patternw,asciichar2unicode(c));
+                      end;
+                  end
+                else
+                  begin
+                     if in_multiline_string and (c in [#10,#13]) and (not ((c=#10) and (last_c=#13))) then
+                       begin
+                         if len>=length(cstringpattern) then
+                           setlength(cstringpattern,length(cstringpattern)+256);
+                         inc(len);
+                         case current_settings.lineendingtype of
+                           le_cr :
+                             cstringpattern[len]:=#13;
+                           le_crlf :
+                             begin
+                               cstringpattern[len]:=#13;
+                               inc(len);
+                               cstringpattern[len]:=#10;
+                             end;
+                           le_lf :
+                             cstringpattern[len]:=#10;
+                           le_platform :
+                             begin
+                               if target_info.newline=#13 then
+                                 cstringpattern[len]:=#13
+                               else if target_info.newline=#13#10 then
+                                 begin
+                                   cstringpattern[len]:=#13;
+                                   inc(len);
+                                   cstringpattern[len]:=#10;
+                                 end
+                               else if target_info.newline=#10 then
+                                 cstringpattern[len]:=#10;
+                             end;
+                           le_source :
+                             cstringpattern[len]:=c;
+                         end;
+                         had_newline:=true;
+                         inc(line_no);
+                       end
+                     else if not (in_multiline_string and (c in [#10,#13])) then
+                       begin
+                         if len>=length(cstringpattern) then
+                           setlength(cstringpattern,length(cstringpattern)+256);
+                         inc(len);
+                         cstringpattern[len]:=c;
+                       end;
+                  end;
+              last_c:=c;
+              until false;
+            end;
+          '^' :
+            begin
+              readchar;
+              c:=upcase(c);
+              if c<#64 then
+               c:=chr(ord(c)+64)
+              else
+               c:=chr(ord(c)-64);
+
+              if iswidestring then
+                concatwidestringchar(patternw,asciichar2unicode(c))
+              else
+                begin
+                  if len>=length(cstringpattern) then
+                    setlength(cstringpattern,length(cstringpattern)+256);
+                   inc(len);
+                   cstringpattern[len]:=c;
+                end;
+
+              readchar;
+            end;
+          else
+           break;
+        end;
+      last_c:=c;
+      until false;
+
+      { strings with length 1 become const chars }
+      if iswidestring then
+        begin
+          if had_multiline_string and not backtick then
+            begin
+            postprocessutf8multiline(len,quote_col,init_quote_count);
+            end;
+          if patternw.len=1 then
+            token:=_CWCHAR
+          else
+            token:=_CWSTRING;
+        end
+      else
+        begin
+          if had_multiline_string and not backtick then
+            begin
+            postprocessmultiline(len,quote_col,init_quote_count);
+            end
+          else
+            setlength(cstringpattern,len);
+          if length(cstringpattern)=1 then
+            begin
+              token:=_CCHAR;
+              pattern:=cstringpattern;
+            end
+          else
+            token:=_CSTRING;
+        end;
+      exit(true);
+    end;
 
 {****************************************************************************
                                Token Scanner
@@ -5428,17 +6093,14 @@ type
         w : word;
         m       : longint;
         mac     : tmacro;
-        asciinr : string[33];
-        iswidestring , firstdigitread: boolean;
+        firstdigitread: boolean;
         had_newline,first_multiline : boolean;
         trimcount : word;
-        last_c : char;
-label
-         quote_label, exit_label;
+       label
+         exit_label;
       begin
         had_newline:=false;
         first_multiline:=false;
-        last_c:=#0;
         flushpendingswitchesstate;
 
         { record tokens? }
@@ -5886,420 +6548,9 @@ label
 
              '''','#','^','`' :
                begin
-                 had_multiline_string:=in_multiline_string;
-                 in_multiline_string:=(c='`');
-                 if in_multiline_string then
-                   begin
-                     if not (m_multiline_strings in current_settings.modeswitches) then
-                       Illegal_Char(c)
-                     else
-                       begin
-                         multiline_start_line:=current_filepos.line;
-                         multiline_start_column:=current_filepos.column;
-                       end;
-                   end;
-                 len:=0;
-                 cstringpattern:='';
-                 iswidestring:=false;
-                 if c='^' then
-                  begin
-                    readchar;
-                    c:=upcase(c);
-                    if (block_type in [bt_type,bt_const_type,bt_var_type]) or
-                       (lasttoken=_ID) or (lasttoken=_NIL) or (lasttoken=_OPERATOR) or
-                       (lasttoken=_RKLAMMER) or (lasttoken=_RECKKLAMMER) or (lasttoken=_CARET) then
-                     begin
-                       token:=_CARET;
-                       goto exit_label;
-                     end
-                    else
-                     begin
-                       inc(len);
-                       setlength(cstringpattern,256);
-                       if c<#64 then
-                         cstringpattern[len]:=chr(ord(c)+64)
-                       else
-                         cstringpattern[len]:=chr(ord(c)-64);
-                       readchar;
-                     end;
-                  end;
-                 repeat
-                   case c of
-                     '#' :
-                       begin
-                         readchar; { read # }
-                         case c of
-                           '$':
-                             begin
-                               readchar; { read leading $ }
-                               asciinr:='$';
-                               while (upcase(c) in ['A'..'F','0'..'9']) and (length(asciinr)<=7) do
-                                 begin
-                                   asciinr:=asciinr+c;
-                                   readchar;
-                                 end;
-                             end;
-                           '&':
-                             begin
-                               readchar; { read leading $ }
-                               asciinr:='&';
-                               while (upcase(c) in ['0'..'7']) and (length(asciinr)<=8) do
-                                 begin
-                                   asciinr:=asciinr+c;
-                                   readchar;
-                                 end;
-                             end;
-                           '%':
-                             begin
-                               readchar; { read leading $ }
-                               asciinr:='%';
-                               while (upcase(c) in ['0','1']) and (length(asciinr)<=22) do
-                                 begin
-                                   asciinr:=asciinr+c;
-                                   readchar;
-                                 end;
-                             end;
-                           else
-                             begin
-                               asciinr:='';
-                               while (c in ['0'..'9']) and (length(asciinr)<=8) do
-                                 begin
-                                   asciinr:=asciinr+c;
-                                   readchar;
-                                 end;
-                             end;
-                         end;
-                         val(asciinr,m,code);
-                         if (asciinr='') or (code<>0) then
-                           Message(scan_e_illegal_char_const)
-                         else if (m<0) or (m>255) or (length(asciinr)>3) then
-                           begin
-                              if (m>=0) and (m<=$10FFFF) then
-                                begin
-                                  if not iswidestring then
-                                   begin
-                                     if len>0 then
-                                       ascii2unicode(@cstringpattern[1],len,current_settings.sourcecodepage,patternw)
-                                     else
-                                       ascii2unicode(nil,len,current_settings.sourcecodepage,patternw);
-                                     iswidestring:=true;
-                                     len:=0;
-                                   end;
-                                  if m<=$FFFF then
-                                    concatwidestringchar(patternw,tcompilerwidechar(m))
-                                  else
-                                    begin
-                                      { split into surrogate pair }
-                                      dec(m,$10000);
-                                      concatwidestringchar(patternw,tcompilerwidechar((m shr 10) + $D800));
-                                      concatwidestringchar(patternw,tcompilerwidechar((m and $3FF) + $DC00));
-                                    end;
-                                end
-                              else
-                                Message(scan_e_illegal_char_const)
-                           end
-                         else if iswidestring then
-                           concatwidestringchar(patternw,asciichar2unicode(char(m)))
-                         else
-                           begin
-                             if len>=length(cstringpattern) then
-                               setlength(cstringpattern,length(cstringpattern)+256);
-                             inc(len);
-                             cstringpattern[len]:=chr(m);
-                           end;
-                       end;
-                     '''','`' :
-                       begin
-                         had_multiline_string:=in_multiline_string;
-                         in_multiline_string:=(c='`');
-                         first_multiline:=in_multiline_string and (last_c in [#0,#32,#61]);
-                         repeat
-                           readchar;
-                           quote_label:
-                             case c of
-                               #26 :
-                                 end_of_file;
-                               #32,#9,#11 :
-                                 if (had_newline or first_multiline) and
-                                    (current_settings.whitespacetrimauto or
-                                    (current_settings.whitespacetrimcount>0)) then
-                                   begin
-                                     if current_settings.whitespacetrimauto then
-                                       trimcount:=multiline_start_column
-                                     else
-                                       trimcount:=current_settings.whitespacetrimcount;
-                                     while (c in [#32,#9,#11]) and (trimcount>0) do
-                                       begin
-                                         readchar;
-                                         dec(trimcount);
-                                       end;
-                                     had_newline:=false;
-                                     first_multiline:=false;
-                                     goto quote_label;
-                                   end;
-                               #10,#13 :
-                                 if not in_multiline_string then
-                                   begin
-                                     if had_multiline_string then
-                                       Message2(scan_f_unterminated_multiline_string,
-                                                tostr(multiline_start_line),
-                                                tostr(multiline_start_column))
-                                     else
-                                       Message(scan_f_string_exceeds_line);
-                                   end;
-                               '''' :
-                                 if not in_multiline_string then
-                                   begin
-                                     readchar;
-                                     if c<>'''' then
-                                      break;
-                                   end;
-                               '`' :
-                                 if in_multiline_string then
-                                   begin
-                                     readchar;
-                                     if c<>'`' then
-                                      break;
-                                   end;
-                             end;
-                           first_multiline:=false;
-                           { interpret as utf-8 string? }
-                           if (ord(c)>=$80) and (current_settings.sourcecodepage=CP_UTF8) then
-                             begin
-                               { convert existing string to an utf-8 string }
-                               if not iswidestring then
-                                 begin
-                                   if len>0 then
-                                     ascii2unicode(@cstringpattern[1],len,current_settings.sourcecodepage,patternw)
-                                   else
-                                     ascii2unicode(nil,len,current_settings.sourcecodepage,patternw);
-                                   iswidestring:=true;
-                                   len:=0;
-                                 end;
-                               { four chars }
-                               if (ord(c) and $f0)=$f0 then
-                                 begin
-                                   { this always represents a surrogate pair, so
-                                     read as 32-bit value and then split into
-                                     the corresponding pair of two wchars }
-                                   d:=ord(c) and $f;
-                                   readchar;
-                                   if (ord(c) and $c0)<>$80 then
-                                     message(scan_e_utf8_malformed);
-                                   d:=(d shl 6) or (ord(c) and $3f);
-                                   readchar;
-                                   if (ord(c) and $c0)<>$80 then
-                                     message(scan_e_utf8_malformed);
-                                   d:=(d shl 6) or (ord(c) and $3f);
-                                   readchar;
-                                   if (ord(c) and $c0)<>$80 then
-                                     message(scan_e_utf8_malformed);
-                                   d:=(d shl 6) or (ord(c) and $3f);
-                                   if d<$10000 then
-                                     message(scan_e_utf8_malformed);
-                                   d:=d-$10000;
-                                   { high surrogate }
-                                   w:=$d800+(d shr 10);
-                                   concatwidestringchar(patternw,w);
-                                   { low surrogate }
-                                   w:=$dc00+(d and $3ff);
-                                   concatwidestringchar(patternw,w);
-                                 end
-                               { three chars }
-                               else if (ord(c) and $e0)=$e0 then
-                                 begin
-                                   w:=ord(c) and $f;
-                                   readchar;
-                                   if (ord(c) and $c0)<>$80 then
-                                     message(scan_e_utf8_malformed);
-                                   w:=(w shl 6) or (ord(c) and $3f);
-                                   readchar;
-                                   if (ord(c) and $c0)<>$80 then
-                                     message(scan_e_utf8_malformed);
-                                   w:=(w shl 6) or (ord(c) and $3f);
-                                   concatwidestringchar(patternw,w);
-                                 end
-                               { two chars }
-                               else if (ord(c) and $c0)<>0 then
-                                 begin
-                                   w:=ord(c) and $1f;
-                                   readchar;
-                                   if (ord(c) and $c0)<>$80 then
-                                     message(scan_e_utf8_malformed);
-                                   w:=(w shl 6) or (ord(c) and $3f);
-                                   concatwidestringchar(patternw,w);
-                                 end
-                               { illegal }
-                               else if (ord(c) and $80)<>0 then
-                                 message(scan_e_utf8_malformed)
-                               else
-                                 concatwidestringchar(patternw,tcompilerwidechar(c))
-                             end
-                           else if iswidestring then
-                             begin
-                               if in_multiline_string and (c in [#10,#13]) and (not ((c=#10) and (last_c=#13))) then
-                                 begin
-                                   if current_settings.sourcecodepage=CP_UTF8 then
-                                     begin
-                                       case current_settings.lineendingtype of
-                                         le_cr :
-                                           concatwidestringchar(patternw,ord(#13));
-                                         le_crlf :
-                                           begin
-                                             concatwidestringchar(patternw,ord(#13));
-                                             concatwidestringchar(patternw,ord(#10));
-                                           end;
-                                         le_lf :
-                                           concatwidestringchar(patternw,ord(#10));
-                                         le_platform :
-                                           begin
-                                             if target_info.newline=#13 then
-                                               concatwidestringchar(patternw,ord(#13))
-                                             else if target_info.newline=#13#10 then
-                                               begin
-                                                 concatwidestringchar(patternw,ord(#13));
-                                                 concatwidestringchar(patternw,ord(#10));
-                                               end
-                                             else if target_info.newline=#10 then
-                                               concatwidestringchar(patternw,ord(#10));
-                                           end;
-                                         le_source :
-                                           concatwidestringchar(patternw,ord(c));
-                                       end;
-                                     end
-                                   else
-                                     case current_settings.lineendingtype of
-                                       le_cr :
-                                         concatwidestringchar(patternw,asciichar2unicode(#13));
-                                       le_crlf :
-                                         begin
-                                           concatwidestringchar(patternw,asciichar2unicode(#13));
-                                           concatwidestringchar(patternw,asciichar2unicode(#10));
-                                         end;
-                                       le_lf :
-                                         concatwidestringchar(patternw,asciichar2unicode(#10));
-                                       le_platform :
-                                         begin
-                                           if target_info.newline=#13 then
-                                             concatwidestringchar(patternw,asciichar2unicode(#13))
-                                           else if target_info.newline=#13#10 then
-                                             begin
-                                               concatwidestringchar(patternw,asciichar2unicode(#13));
-                                               concatwidestringchar(patternw,asciichar2unicode(#10));
-                                             end
-                                           else if target_info.newline=#10 then
-                                             concatwidestringchar(patternw,asciichar2unicode(#10));
-                                         end;
-                                       le_source :
-                                         concatwidestringchar(patternw,asciichar2unicode(c));
-                                     end;
-                                   had_newline:=true;
-                                   inc(line_no);
-                                 end
-                               else if not (in_multiline_string and (c in [#10,#13])) then
-                                 begin
-                                   if current_settings.sourcecodepage=CP_UTF8 then
-                                     concatwidestringchar(patternw,ord(c))
-                                   else
-                                     concatwidestringchar(patternw,asciichar2unicode(c));
-                                 end;
-                             end
-                           else
-                             begin
-                                if in_multiline_string and (c in [#10,#13]) and (not ((c=#10) and (last_c=#13))) then
-                                  begin
-                                    if len>=length(cstringpattern) then
-                                      setlength(cstringpattern,length(cstringpattern)+256);
-                                    inc(len);
-                                    case current_settings.lineendingtype of
-                                      le_cr :
-                                        cstringpattern[len]:=#13;
-                                      le_crlf :
-                                        begin
-                                          cstringpattern[len]:=#13;
-                                          inc(len);
-                                          cstringpattern[len]:=#10;
-                                        end;
-                                      le_lf :
-                                        cstringpattern[len]:=#10;
-                                      le_platform :
-                                        begin
-                                          if target_info.newline=#13 then
-                                            cstringpattern[len]:=#13
-                                          else if target_info.newline=#13#10 then
-                                            begin
-                                              cstringpattern[len]:=#13;
-                                              inc(len);
-                                              cstringpattern[len]:=#10;
-                                            end
-                                          else if target_info.newline=#10 then
-                                            cstringpattern[len]:=#10;
-                                        end;
-                                      le_source :
-                                        cstringpattern[len]:=c;
-                                    end;
-                                    had_newline:=true;
-                                    inc(line_no);
-                                  end
-                                else if not (in_multiline_string and (c in [#10,#13])) then
-                                  begin
-                                    if len>=length(cstringpattern) then
-                                      setlength(cstringpattern,length(cstringpattern)+256);
-                                    inc(len);
-                                    cstringpattern[len]:=c;
-                                  end;
-                             end;
-                         last_c:=c;
-                         until false;
-                       end;
-                     '^' :
-                       begin
-                         readchar;
-                         c:=upcase(c);
-                         if c<#64 then
-                          c:=chr(ord(c)+64)
-                         else
-                          c:=chr(ord(c)-64);
-
-                         if iswidestring then
-                           concatwidestringchar(patternw,asciichar2unicode(c))
-                         else
-                           begin
-                             if len>=length(cstringpattern) then
-                               setlength(cstringpattern,length(cstringpattern)+256);
-                              inc(len);
-                              cstringpattern[len]:=c;
-                           end;
-
-                         readchar;
-                       end;
-                     else
-                      break;
-                   end;
-                 last_c:=c;
-                 until false;
-                 { strings with length 1 become const chars }
-                 if iswidestring then
-                   begin
-                     if patternw.len=1 then
-                       token:=_CWCHAR
-                     else
-                       token:=_CWSTRING;
-                   end
-                 else
-                   begin
-                     setlength(cstringpattern,len);
-                     if length(cstringpattern)=1 then
-                       begin
-                         token:=_CCHAR;
-                         pattern:=cstringpattern;
-                       end
-                     else
-                       token:=_CSTRING;
-                   end;
+                 readstringconstant;
                  goto exit_label;
                end;
-
              '>' :
                begin
                  readchar;

+ 19 - 0
tests/test/tmultilinestring29.pp

@@ -0,0 +1,19 @@
+// Multiline string
+{$mode objfpc}
+const 
+  s = '''
+  this
+  is
+  a multiline
+  string
+  ''';
+  
+  stest = 'this'+sLineBreak+'is'+slinebreak+'a multiline'+sLineBreak+'string';
+  
+begin
+  if not (s=stest) then
+    begin
+    writeln('Wrong string, expected "',stest,'" but got: "',s,'"');
+    halt(1);
+    end;
+end.

+ 20 - 0
tests/test/tmultilinestring30.pp

@@ -0,0 +1,20 @@
+// Multiline string
+// There is a space after the word multiline, test that it is not stripped!
+{$mode objfpc}
+const 
+  s = '''
+  this
+  is
+  a multiline 
+  string
+  ''';
+  
+  stest = 'this'+sLineBreak+'is'+slinebreak+'a multiline '+sLineBreak+'string';
+  
+begin
+  if not (s=stest) then
+    begin
+    writeln('Wrong string, expected "',stest,'" but got: "',s,'"');
+    halt(1);
+    end;
+end.

+ 19 - 0
tests/test/tmultilinestring31.pp

@@ -0,0 +1,19 @@
+// Multiline string, 5 quotes, less quotes are preserved in the string
+{$mode objfpc}
+const 
+  s = '''''
+  this
+  is
+  a '''multiline'''
+  string
+  ''''';
+  
+  stest = 'this'+sLineBreak+'is'+slinebreak+'a ''''''multiline'''''''+sLineBreak+'string';
+  
+begin
+  if not (s=stest) then
+    begin
+    writeln('Wrong string, expected "',stest,'" but got: "',s,'"');
+    halt(1);
+    end;
+end.

+ 20 - 0
tests/test/tmultilinestring32.pp

@@ -0,0 +1,20 @@
+{ %fail }
+// Multiline string but not properly indented
+{$mode objfpc}
+const 
+  s = '''
+  this
+ is
+  a multiline
+  string
+  ''';
+  
+  stest = 'this'+sLineBreak+'is'+slinebreak+'a multiline'+sLineBreak+'string';
+  
+begin
+  if not (s=stest) then
+    begin
+    writeln('Wrong string, expected "',stest,'" but got: "',s,'"');
+    halt(1);
+    end;
+end.

+ 18 - 0
tests/test/tmultilinestring33.pp

@@ -0,0 +1,18 @@
+// Multiline string
+{$mode objfpc}
+const 
+  s = '''
+  this is
+  a UTF8 àéèùá
+  multiline string
+  ''';
+  
+  stest = 'this is'+slinebreak+'a UTF8 àéèùá'+sLineBreak+'multiline string';
+  
+begin
+  if not (s=stest) then
+    begin
+    writeln('Wrong string, expected "',stest,'" but got: "',s,'"');
+    halt(1);
+    end;
+end.

+ 21 - 0
tests/test/tmultilinestring34.pp

@@ -0,0 +1,21 @@
+// Multiline string
+{$mode delphi}
+{$TEXTBLOCK CRLF}
+const 
+  s = '''
+  this
+  is
+  a multiline
+  string
+  ''';
+  
+  CRLF = #13#10;
+  stest = 'this'+CRLF+'is'+CRLF+'a multiline'+CRLF+'string';
+  
+begin
+  if not (s=stest) then
+    begin
+    writeln('Wrong string, expected "',stest,'" but got: "',s,'"');
+    halt(1);
+    end;
+end.

+ 20 - 0
tests/test/tmultilinestring35.pp

@@ -0,0 +1,20 @@
+// Multiline string, 
+{$mode delphi}
+{$TEXTBLOCK CRLF}
+const 
+  s = '''
+  this is
+  a UTF8 àéèùá
+  multiline string
+  ''';
+  
+  CRLF = #13#10;  
+  stest = 'this is'+CRLF+'a UTF8 àéèùá'+CRLF+'multiline string';
+  
+begin
+  if not (s=stest) then
+    begin
+    writeln('Wrong string, expected "',stest,'" but got: "',s,'"');
+    halt(1);
+    end;
+end.