8 ani în urmă · e8f7c9dfdd
--- a/.gitattributes
+++ b/.gitattributes
@@ -11524,6 +11524,7 @@ tests/test/cg/tcalvar7.pp svneol=native#text/plain
 
				 tests/test/cg/tcalvar8.pp svneol=native#text/plain
			
 
				 tests/test/cg/tcase.pp svneol=native#text/plain
			
 
				 tests/test/cg/tcase2.pp svneol=native#text/plain
			
 
				+tests/test/cg/tcase3.pp svneol=native#text/pascal
			
 
				 tests/test/cg/tclacla1.pp svneol=native#text/plain
			
 
				 tests/test/cg/tclasize.pp svneol=native#text/plain
			
 
				 tests/test/cg/tclatype.pp svneol=native#text/plain
			
--- a/tests/test/cg/tcase3.pp
+++ b/tests/test/cg/tcase3.pp
@@ -0,0 +1,233 @@
 
				+{$codepage utf8}
			
 
				+{$mode objfpc} {$h+} {$coperators on}
			
 
				+uses
			
 
				+	SysUtils, DateUtils;
			
 
				+
			
 
				+const
			
 
				+	Sample = widestring('Examples' + LineEnding +
			
 
				+		'Appointments: Аппойнтменты (Appojntmenty)[2]' + LineEnding +
			
 
				+		'Iced Coffee: Аисд кофе (Aisd kofe)[2]' + LineEnding +
			
 
				+		'Would you like that sliced or in one piece?: Вам наслайсовать или писом? (Vam naslajsovatj ili pisom?)[4]' + LineEnding +
			
 
				+		'Driving upstate on the highways: Драйвуем в апстейт по хайвеям (Drajvujem v apstejt po hajwejam)[5]' + LineEnding +
			
 
				+		'Sliced Cheese: Слайсающий чиз (Slajsajuçij čiz)[5]' + LineEnding +
			
 
				+		'To merge branches: Смержить бранчи (Smeržitj branči)[2]' + LineEnding +
			
 
				+		'To manage: Сменеджить (Smenedžitj)' + LineEnding +
			
 
				+		'I sent you message with attached request: Я засендил тебе месседж с приаттаченым реквестом (Ya zasendil tebe messedž s priattachenim rekvestom)');
			
 
				+	Iterations = 880000;
			
 
				+
			
 
				+type
			
 
				+	CodepointFlag = (Letter, Vowel, Consonant, Digit, Whitespace, Newline, Punctuation, Cyrillic, Latin, Diacritic);
			
 
				+	CodepointClassification = set of CodepointFlag;
			
 
				+
			
 
				+	function Classify_Case(const cp: widechar): CodepointClassification;
			
 
				+	begin
			
 
				+		case cp of
			
 
				+			' ', #$9 {tab}, #$a0 {nbsp}, '　' {ideographic space}: result := [Whitespace];
			
 
				+			#$a {lf}, #$d {cr}: result := [Whitespace, Newline];
			
 
				+			'0' .. '9': result := [Digit];
			
 
				+			'a', 'e', 'i', 'o', 'u', 'y', 'A', 'E', 'I', 'O', 'U', 'Y': result := [Letter, Latin, Vowel];
			
 
				+			'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'z',
			
 
				+			'B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X', 'Z': result := [Letter, Latin, Consonant];
			
 
				+			'.', '!', ',', '?', ':', ';', '-', '—': result := [Punctuation];
			
 
				+			'А', 'Е', 'И', 'О', 'У', 'Ы', 'Э', 'Ю', 'Я', 'а', 'е', 'и', 'о', 'у', 'ы', 'э', 'ю', 'я', 'Ё', 'ё': result := [Letter, Cyrillic, Vowel];
			
 
				+			'б', 'в', 'г', 'д', 'ж', 'з', 'й', 'к', 'л', 'м', 'н', 'п', 'р', 'с', 'т', 'ф', 'х', 'ц', 'ч', 'ш', 'щ',
			
 
				+			'Б', 'В', 'Г', 'Д', 'Ж', 'З', 'Й', 'К', 'Л', 'М', 'Н', 'П', 'Р', 'С', 'Т', 'Ф', 'Х', 'Ц', 'Ч', 'Ш', 'Щ': result := [Letter, Cyrillic, Consonant];
			
 
				+			'ъ', 'ь', 'Ъ', 'Ь': result := [Letter, Cyrillic];
			
 
				+			#$300 {first diacritic} .. #$36f {last diacritic}: result := [Diacritic];
			
 
				+			else result := [];
			
 
				+		end;
			
 
				+	end;
			
 
				+
			
 
				+	// I have built this tree from sorted starts of subsequences:
			
 
				+	//  (1)       9   tab
			
 
				+	//  (2)       A   lf
			
 
				+	//  (3)       D   cr
			
 
				+	//  (4)      20   space
			
 
				+	//  (5)      21   !
			
 
				+	//  (6)      2C   ,
			
 
				+	//  (7)      2D   -
			
 
				+	//  (8)      2E   .
			
 
				+	//  (9)   30–39   0-9
			
 
				+	// (10)      3A   :
			
 
				+	// (11)      3B   ;
			
 
				+	// (12)      3F   ?
			
 
				+	// (13)   41-5a   A-Z
			
 
				+	// (14)   61-7a   a-z
			
 
				+	// (15)      A0   nbsp
			
 
				+	// (16) 300–36f   diacritics
			
 
				+	// (17)     401   Ё
			
 
				+	// (18) 410–44f   А-я
			
 
				+	// (19)     451   ё
			
 
				+	// (20)    2014   — (em dash)
			
 
				+	// (21)    3000   ideographic space
			
 
				+	function Classify_Tree(const cp: widechar): CodepointClassification;
			
 
				+	begin
			
 
				+		if cp < #$3F then // (1) ~ (11)
			
 
				+			if cp < #$2D then // (1) ~ (6) — small set without subsequences, resort to predictor-friendly :) case..of
			
 
				+				case cp of
			
 
				+					' ', #$9 {tab}: exit([Whitespace]);
			
 
				+					#$a {lf}, #$d {cr}: exit([Whitespace, Newline]);
			
 
				+					'!', ',': exit([Punctuation]);
			
 
				+				end
			
 
				+			else // (7) ~ (11)
			
 
				+				if cp < #$3A then // (7) ~ (9)
			
 
				+					if cp < #$30 then // (7) ~ (8)
			
 
				+						case cp of
			
 
				+							'-', '.': exit([Punctuation]);
			
 
				+						end
			
 
				+					else // (9)
			
 
				+						case cp of
			
 
				+							'0' .. '9': exit([Digit]);
			
 
				+						end
			
 
				+				else // (10) ~ (11)
			
 
				+					case cp of
			
 
				+						':', ';': exit([Punctuation]);
			
 
				+					end
			
 
				+		else // (12) ~ (21)
			
 
				+			if cp < #$401 then // (12) ~ (16)
			
 
				+				if cp < #$A0 then // (12) ~ (14)
			
 
				+					if cp < #$61 then // (12) ~ (13)
			
 
				+						if cp < #$41 then // (12)
			
 
				+							case cp of
			
 
				+								'?': exit([Punctuation]);
			
 
				+							end
			
 
				+						else // (13)
			
 
				+							case cp of
			
 
				+								'A', 'E', 'I', 'O', 'U', 'Y': exit([Letter, Latin, Vowel]);
			
 
				+								'B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X', 'Z': exit([Letter, Latin, Consonant]);
			
 
				+							end
			
 
				+					else //(14)
			
 
				+						case cp of
			
 
				+							'a', 'e', 'i', 'o', 'u', 'y': exit([Letter, Latin, Vowel]);
			
 
				+							'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'z': exit([Letter, Latin, Consonant]);
			
 
				+						end
			
 
				+				else // (15) ~ (16)
			
 
				+					if cp < #$300 then // (15)
			
 
				+						case cp of
			
 
				+							#$a0 {nbsp}: exit([Whitespace]);
			
 
				+						end
			
 
				+					else // (16)
			
 
				+						case cp of
			
 
				+							#$300 {first diacritic} .. #$36f {last diacritic}: exit([Diacritic]);
			
 
				+						end
			
 
				+			else // (17) ~ (21)
			
 
				+				if cp < #$2014 then // (17) ~ (19)
			
 
				+					if cp < #$451 then // (17) ~ (18)
			
 
				+						if cp < #$410 then // (17)
			
 
				+							case cp of
			
 
				+								'Ё': exit([Letter, Cyrillic, Vowel]);
			
 
				+							end
			
 
				+						else // (18)
			
 
				+							case cp of
			
 
				+								'А', 'Е', 'И', 'О', 'У', 'Ы', 'Э', 'Ю', 'Я', 'а', 'е', 'и', 'о', 'у', 'ы', 'э', 'ю', 'я': exit([Letter, Cyrillic, Vowel]);
			
 
				+								'б', 'в', 'г', 'д', 'ж', 'з', 'й', 'к', 'л', 'м', 'н', 'п', 'р', 'с', 'т', 'ф', 'х', 'ц', 'ч', 'ш', 'щ',
			
 
				+								'Б', 'В', 'Г', 'Д', 'Ж', 'З', 'Й', 'К', 'Л', 'М', 'Н', 'П', 'Р', 'С', 'Т', 'Ф', 'Х', 'Ц', 'Ч', 'Ш', 'Щ': exit([Letter, Cyrillic, Consonant]);
			
 
				+								'ъ', 'ь', 'Ъ', 'Ь': exit([Letter, Cyrillic]);
			
 
				+							end
			
 
				+					else // (19)
			
 
				+						case cp of
			
 
				+							'ё': exit([Letter, Cyrillic, Vowel]);
			
 
				+						end
			
 
				+				else // (20) ~ (21)
			
 
				+					case cp of
			
 
				+						'—': exit([Punctuation]);
			
 
				+						'　': exit([Whitespace]);
			
 
				+					end;
			
 
				+
			
 
				+		// 'else'
			
 
				+		result := [];
			
 
				+	end;
			
 
				+
			
 
				+	procedure Test;
			
 
				+		function ToString(const cls: CodepointClassification): string;
			
 
				+		var
			
 
				+			f: CodepointFlag;
			
 
				+		begin
			
 
				+			result := '';
			
 
				+			for f in CodepointFlag do
			
 
				+				if f in cls then
			
 
				+				begin
			
 
				+					if result <> '' then result += ', ';
			
 
				+					WriteStr(result, f);
			
 
				+				end;
			
 
				+			result := '[' + result + ']';
			
 
				+		end;
			
 
				+	var
			
 
				+		cp: widechar;
			
 
				+	begin
			
 
				+		write('Sanity check... ');
			
 
				+		for cp := #0 to #$3001 do
			
 
				+			if Classify_Case(cp) <> Classify_Tree(cp) then
			
 
				+			begin
			
 
				+				writeln(
			
 
				+					'Failed. Classify_Case isn''t equivalent to Classify_Tree.', LineEnding,
			
 
				+					'Symbol code: $', HexStr(cardinal(cp), bitsizeof(widechar) div 4), LineEnding,
			
 
				+					'Classify_Case: ', ToString(Classify_Case(cp)), LineEnding,
			
 
				+					'Classify_Tree: ', ToString(Classify_Tree(cp)), LineEnding);
			
 
				+				readln;
			
 
				+				halt(1);
			
 
				+			end;
			
 
				+
			
 
				+		writeln('Passed. Classify_Case and Classify_Tree are equivalent.',
			
 
				+			LineEnding);
			
 
				+	end;
			
 
				+
			
 
				+	procedure Benchmark;
			
 
				+	var
			
 
				+		caseTime, treeTime: TDateTime;
			
 
				+		vow, lat: cardinal;
			
 
				+		iteration: cardinal;
			
 
				+		i: SizeInt;
			
 
				+
			
 
				+		procedure ResetWork(out vow, lat: cardinal);
			
 
				+		begin
			
 
				+			vow := 0; lat := 0;
			
 
				+		end;
			
 
				+
			
 
				+		procedure Note(const cls: CodepointClassification);
			
 
				+		begin
			
 
				+			if Vowel in cls then inc(vow);
			
 
				+			if Latin in cls then inc(lat);
			
 
				+		end;
			
 
				+
			
 
				+	begin
			
 
				+		write('Benchmarking Classify_Tree... ');
			
 
				+		ResetWork(vow, lat);
			
 
				+		treeTime := Now;
			
 
				+		for iteration := 1 to Iterations do
			
 
				+		begin
			
 
				+			for i := 1 to length(Sample) do
			
 
				+				Note(Classify_Tree(Sample[i]));
			
 
				+			if iteration < Iterations then ResetWork(vow, lat);
			
 
				+		end;
			
 
				+		treeTime := SecondSpan(Now, treeTime);
			
 
				+		writeln(treeTime:0:1, ' s (', treeTime/Iterations*1e6:0:1, ' mcs/it), vow. ', vow, ', lat. ', lat, '.');
			
 
				+
			
 
				+		write('Benchmarking Classify_Case... ');
			
 
				+		ResetWork(vow, lat);
			
 
				+		caseTime := Now;
			
 
				+		for iteration := 1 to Iterations do
			
 
				+		begin
			
 
				+			for i := 1 to length(Sample) do
			
 
				+				Note(Classify_Case(Sample[i]));
			
 
				+			if iteration < Iterations then ResetWork(vow, lat);
			
 
				+		end;
			
 
				+		caseTime := SecondSpan(Now, caseTime);
			
 
				+		writeln(caseTime:0:1, ' s (', caseTime/Iterations*1e6:0:1, ' mcs/it), vow. ', vow, ', lat. ', lat, '.');
			
 
				+
			
 
				+		if (treeTime < 0.5) or (caseTime < 0.5) then
			
 
				+			writeln('Could not deduce anything useful.')
			
 
				+		else
			
 
				+			if (treeTime <= 1.05*caseTime) and (caseTime <= 1.05*treeTime) then
			
 
				+				writeln('There was no observable difference.')
			
 
				+			else
			
 
				+				if treeTime > caseTime then
			
 
				+					writeln('Classify_Case was faster by ', (treeTime / caseTime - 1)*100:0:0, '% (', treeTime/caseTime:0:1, 'x).')
			
 
				+				else
			
 
				+					writeln('Classify_Tree was faster by ', (caseTime / treeTime - 1)*100:0:0, '% (', caseTime/treeTime:0:1, 'x).')
			
 
				+	end;
			
 
				+
			
 
				+begin
			
 
				+	Test;
			
 
				+//        Benchmark;
			
 
				+end.