IdentifierUtils.cs 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Globalization;
  4. using System.Text;
  5. namespace GodotTools.ProjectEditor
  6. {
  7. public static class IdentifierUtils
  8. {
  9. public static string SanitizeQualifiedIdentifier(string qualifiedIdentifier, bool allowEmptyIdentifiers)
  10. {
  11. if (string.IsNullOrEmpty(qualifiedIdentifier))
  12. throw new ArgumentException($"{nameof(qualifiedIdentifier)} cannot be empty", nameof(qualifiedIdentifier));
  13. string[] identifiers = qualifiedIdentifier.Split('.');
  14. for (int i = 0; i < identifiers.Length; i++)
  15. {
  16. identifiers[i] = SanitizeIdentifier(identifiers[i], allowEmpty: allowEmptyIdentifiers);
  17. }
  18. return string.Join(".", identifiers);
  19. }
  20. /// <summary>
  21. /// Skips invalid identifier characters including decimal digit numbers at the start of the identifier.
  22. /// </summary>
  23. private static void SkipInvalidCharacters(string source, int startIndex, StringBuilder outputBuilder)
  24. {
  25. for (int i = startIndex; i < source.Length; i++)
  26. {
  27. char @char = source[i];
  28. switch (char.GetUnicodeCategory(@char))
  29. {
  30. case UnicodeCategory.UppercaseLetter:
  31. case UnicodeCategory.LowercaseLetter:
  32. case UnicodeCategory.TitlecaseLetter:
  33. case UnicodeCategory.ModifierLetter:
  34. case UnicodeCategory.LetterNumber:
  35. case UnicodeCategory.OtherLetter:
  36. outputBuilder.Append(@char);
  37. break;
  38. case UnicodeCategory.NonSpacingMark:
  39. case UnicodeCategory.SpacingCombiningMark:
  40. case UnicodeCategory.ConnectorPunctuation:
  41. case UnicodeCategory.DecimalDigitNumber:
  42. // Identifiers may start with underscore
  43. if (outputBuilder.Length > startIndex || @char == '_')
  44. outputBuilder.Append(@char);
  45. break;
  46. }
  47. }
  48. }
  49. public static string SanitizeIdentifier(string identifier, bool allowEmpty)
  50. {
  51. if (string.IsNullOrEmpty(identifier))
  52. {
  53. if (allowEmpty)
  54. return "Empty"; // Default value for empty identifiers
  55. throw new ArgumentException($"{nameof(identifier)} cannot be empty if {nameof(allowEmpty)} is false", nameof(identifier));
  56. }
  57. if (identifier.Length > 511)
  58. identifier = identifier.Substring(0, 511);
  59. var identifierBuilder = new StringBuilder();
  60. int startIndex = 0;
  61. if (identifier[0] == '@')
  62. {
  63. identifierBuilder.Append('@');
  64. startIndex += 1;
  65. }
  66. SkipInvalidCharacters(identifier, startIndex, identifierBuilder);
  67. if (identifierBuilder.Length == startIndex)
  68. {
  69. // All characters were invalid so now it's empty. Fill it with something.
  70. identifierBuilder.Append("Empty");
  71. }
  72. identifier = identifierBuilder.ToString();
  73. if (identifier[0] != '@' && IsKeyword(identifier, anyDoubleUnderscore: true))
  74. identifier = '@' + identifier;
  75. return identifier;
  76. }
  77. static bool IsKeyword(string value, bool anyDoubleUnderscore)
  78. {
  79. // Identifiers that start with double underscore are meant to be used for reserved keywords.
  80. // Only existing keywords are enforced, but it may be useful to forbid any identifier
  81. // that begins with double underscore to prevent issues with future C# versions.
  82. if (anyDoubleUnderscore)
  83. {
  84. if (value.Length > 2 && value[0] == '_' && value[1] == '_' && value[2] != '_')
  85. return true;
  86. }
  87. else
  88. {
  89. if (DoubleUnderscoreKeywords.Contains(value))
  90. return true;
  91. }
  92. return Keywords.Contains(value);
  93. }
  94. private static readonly HashSet<string> DoubleUnderscoreKeywords = new HashSet<string>
  95. {
  96. "__arglist",
  97. "__makeref",
  98. "__reftype",
  99. "__refvalue",
  100. };
  101. private static readonly HashSet<string> Keywords = new HashSet<string>
  102. {
  103. "as",
  104. "do",
  105. "if",
  106. "in",
  107. "is",
  108. "for",
  109. "int",
  110. "new",
  111. "out",
  112. "ref",
  113. "try",
  114. "base",
  115. "bool",
  116. "byte",
  117. "case",
  118. "char",
  119. "else",
  120. "enum",
  121. "goto",
  122. "lock",
  123. "long",
  124. "null",
  125. "this",
  126. "true",
  127. "uint",
  128. "void",
  129. "break",
  130. "catch",
  131. "class",
  132. "const",
  133. "event",
  134. "false",
  135. "fixed",
  136. "float",
  137. "sbyte",
  138. "short",
  139. "throw",
  140. "ulong",
  141. "using",
  142. "where",
  143. "while",
  144. "yield",
  145. "double",
  146. "extern",
  147. "object",
  148. "params",
  149. "public",
  150. "return",
  151. "sealed",
  152. "sizeof",
  153. "static",
  154. "string",
  155. "struct",
  156. "switch",
  157. "typeof",
  158. "unsafe",
  159. "ushort",
  160. "checked",
  161. "decimal",
  162. "default",
  163. "finally",
  164. "foreach",
  165. "partial",
  166. "private",
  167. "virtual",
  168. "abstract",
  169. "continue",
  170. "delegate",
  171. "explicit",
  172. "implicit",
  173. "internal",
  174. "operator",
  175. "override",
  176. "readonly",
  177. "volatile",
  178. "interface",
  179. "namespace",
  180. "protected",
  181. "unchecked",
  182. "stackalloc",
  183. };
  184. }
  185. }