toker.bmx 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. ' Copyright (c) 2013-2016 Bruce A Henderson
  2. '
  3. ' Based on the public domain Monkey "trans" by Mark Sibly
  4. '
  5. ' This software is provided 'as-is', without any express or implied
  6. ' warranty. In no event will the authors be held liable for any damages
  7. ' arising from the use of this software.
  8. '
  9. ' Permission is granted to anyone to use this software for any purpose,
  10. ' including commercial applications, and to alter it and redistribute it
  11. ' freely, subject to the following restrictions:
  12. '
  13. ' 1. The origin of this software must not be misrepresented; you must not
  14. ' claim that you wrote the original software. If you use this software
  15. ' in a product, an acknowledgment in the product documentation would be
  16. ' appreciated but is not required.
  17. '
  18. ' 2. Altered source versions must be plainly marked as such, and must not be
  19. ' misrepresented as being the original software.
  20. '
  21. ' 3. This notice may not be removed or altered from any source
  22. ' distribution.
  23. '
  24. SuperStrict
  25. Import "config.bmx"
  26. Import "type.bmx"
  27. 'toke_type
  28. Const TOKE_EOF:Int=0
  29. Const TOKE_SPACE:Int=1
  30. Const TOKE_IDENT:Int=2
  31. Const TOKE_KEYWORD:Int=3
  32. Const TOKE_INTLIT:Int=4
  33. Const TOKE_FLOATLIT:Int=5
  34. Const TOKE_STRINGLIT:Int=6
  35. Const TOKE_STRINGLITEX:Int=7
  36. Const TOKE_SYMBOL:Int=8
  37. Const TOKE_LINECOMMENT:Int=9
  38. Const TOKE_LONGLIT:Int=10
  39. Const TOKE_NATIVE:Int=11
  40. '***** Tokenizer *****
  41. Type TToker
  42. Const _keywords$=";"+ ..
  43. "strict;superstrict;"+ ..
  44. "public;private;"+ ..
  45. "short;int;float;double;long;string;object;ptr;var;varptr;mod;continue;exit;"+ ..
  46. "include;import;module;extern;framework;"+ ..
  47. "new;self;super;eachin;true;false;null;not;"+ ..
  48. "extends;abstract;select;case;default;"+ ..
  49. "const;local;global;field;method;function;type;"+ ..
  50. "and;or;shl;shr;sar;end;if;then;else;elseif;endif;while;wend;repeat;until;forever;"+ ..
  51. "for;to;step;next;return;"+ ..
  52. "alias;rem;endrem;throw;assert;try;catch;nodebug;incbin;"+ ..
  53. "endselect;endmethod;endfunction;endtype;endextern;endtry;endwhile;pi;release;defdata;readdata;restoredata;" + ..
  54. "interface;endinterface;implements;"+ ..
  55. "size_t;uint;ulong;struct;endstruct;" + ..
  56. "operator;"
  57. 'Global _symbols$[]=[ "..","[]",":=",":*",":/",":+",":-",":|",":&",":~~" ]
  58. 'Global _symbols_map$[]=[ "..","[]",":=","*=","/=","+=","-=","|=","&=","~~=" ]
  59. Field _path$
  60. Field _line:Int
  61. Field _lines:String[]
  62. Field _source$
  63. Field _toke$
  64. Field _tokeLower$
  65. Field _tokeType:Int
  66. Field _tokePos:Int
  67. Field _lookingForEndRem:Int
  68. Field _preprocess:Int
  69. Method Create:TToker( path$,source$, preprocess:Int = False )
  70. _path=path
  71. _line=1
  72. _source=source
  73. _toke=""
  74. _tokeType=TOKE_EOF
  75. _tokePos=0
  76. _lines = source.split("~n")
  77. _preprocess = preprocess
  78. Return Self
  79. End Method
  80. Method rollback(pos:Int, toketype:Int = -1)
  81. _tokePos = pos
  82. If toketype >= 0
  83. _tokeType = toketype
  84. End If
  85. End Method
  86. Method Copy:TToker( toker:TToker )
  87. _path=toker._path
  88. _line=toker._line
  89. _source=toker._source
  90. _toke=toker._toke
  91. _tokeLower=toker._tokeLower
  92. _tokeType=toker._tokeType
  93. _tokePos=toker._tokePos
  94. _lines=toker._lines
  95. _lookingForEndRem=toker._lookingForEndRem
  96. _preprocess=toker._preprocess
  97. Return Self
  98. End Method
  99. Method Path$()
  100. Return _path
  101. End Method
  102. Method Line:Int()
  103. Return _line
  104. End Method
  105. Method NextToke$()
  106. If _tokePos=_source.Length
  107. _toke=""
  108. _tokeType=TOKE_EOF
  109. Return _toke
  110. EndIf
  111. Local char:Int=_source[_tokePos]
  112. Local str$=Chr( char )
  113. Local start:Int=_tokePos
  114. _tokePos:+1
  115. _toke=""
  116. If str="~n"
  117. _line:+1
  118. _tokeType=TOKE_SYMBOL
  119. Else If IsSpace( char )
  120. While _tokePos<_source.Length And IsSpace( TCHR() ) And TSTR()<>"~n"
  121. _tokePos:+1
  122. Wend
  123. _tokeType=TOKE_SPACE
  124. Else If str="_" Or IsAlpha( char )
  125. _tokeType=TOKE_IDENT
  126. While _tokePos<_source.Length
  127. Local char:Int=_source[_tokePos]
  128. If char<>Asc("_") And Not IsAlpha( char ) And Not IsDigit( char ) Exit
  129. _tokePos:+1
  130. Wend
  131. _toke=_source[start.._tokePos]
  132. _tokeLower = _toke.ToLower()
  133. If _keywords.Contains( ";"+_tokeLower+";" )
  134. _tokeType=TOKE_KEYWORD
  135. If Not _lookingForEndRem And _tokeLower = "rem" Then
  136. _lookingForEndRem = True
  137. ParseRemStmt()
  138. End If
  139. EndIf
  140. Else If IsDigit( char ) Or (str="." And IsDigit( TCHR() ))
  141. _tokeType=TOKE_INTLIT
  142. If str="." _tokeType=TOKE_FLOATLIT
  143. While IsDigit( TCHR() )
  144. _tokePos:+1
  145. Wend
  146. If _tokeType=TOKE_INTLIT And TSTR()="." And IsDigit( TCHR(1) )
  147. _tokeType=TOKE_FLOATLIT
  148. _tokePos:+2
  149. While IsDigit( TCHR() )
  150. _tokePos:+1
  151. Wend
  152. EndIf
  153. If TSTR().ToLower()="e"
  154. _tokeType=TOKE_FLOATLIT
  155. _tokePos:+1
  156. If TSTR()="+" Or TSTR()="-" _tokePos:+1
  157. While IsDigit( TCHR() )
  158. _tokePos:+1
  159. Wend
  160. EndIf
  161. If _tokeType=TOKE_INTLIT And _tokePos-start > 10 ' BaH Long
  162. _tokeType=TOKE_LONGLIT
  163. EndIf
  164. Else If str="%" And IsBinDigit( TCHR() )
  165. _tokeType=TOKE_INTLIT
  166. _tokePos:+1
  167. While IsBinDigit( TCHR() )
  168. _tokePos:+1
  169. Wend
  170. Else If str="$" And IsHexDigit( TCHR() )
  171. _tokeType=TOKE_INTLIT
  172. _tokePos:+1
  173. While IsHexDigit( TCHR() )
  174. _tokePos:+1
  175. Wend
  176. Else If str="~q"
  177. _tokeType=TOKE_STRINGLIT
  178. While TSTR() And TSTR()<>"~q"
  179. ' Strings can't cross line boundries
  180. If TSTR()="~n" Then
  181. _tokePos:-1
  182. Exit
  183. End If
  184. _tokePos:+1
  185. Wend
  186. If _tokePos<_source.Length _tokePos:+1 Else _tokeType=TOKE_STRINGLITEX
  187. Else If str="'"
  188. If TSTR()="!" Then
  189. _tokeType=TOKE_NATIVE
  190. While TSTR()
  191. If TSTR()="~n" Then
  192. _tokePos:-1
  193. Exit
  194. End If
  195. _tokePos:+1
  196. Wend
  197. Else
  198. _tokeType=TOKE_LINECOMMENT
  199. SkipToEOL()
  200. ' completely ignore line comments
  201. If TSTR()="~n" Then
  202. start = _tokePos
  203. If _tokePos<_source.Length
  204. _tokePos:+1
  205. End If
  206. _line:+1
  207. _tokeType=TOKE_SYMBOL
  208. End If
  209. End If
  210. Else If str="." And TSTR()="." Then
  211. Local pos:Int = _tokePos
  212. Local isValidTilEOL:Int = True
  213. _tokePos:+1
  214. While TSTR() And TSTR()<>"~n"
  215. If Not IsSpace(TCHR()) Then
  216. isValidTilEOL = False
  217. End If
  218. _tokePos:+1
  219. Wend
  220. If Not isValidTilEOL Or _preprocess Then
  221. _tokePos = pos + 1
  222. _tokeType=TOKE_SYMBOL
  223. Else
  224. start = _tokePos
  225. _toke = " "
  226. _tokePos:+1
  227. _line:+1
  228. _tokeType=TOKE_SPACE
  229. End If
  230. Else
  231. _tokeType=TOKE_SYMBOL
  232. For Local i:Int = 0 Until _symbols.length
  233. Local sym$=_symbols[i]
  234. If char<>sym[0] Continue
  235. 'do not try to read beyond source length
  236. If TCHR(sym.length) <= 0 Then Continue
  237. If _source[_tokePos-1.._tokePos+sym.length-1].ToLower()=sym
  238. ' if symbol has alpha, test for trailing alphanumeric char - in which case this is not a symbol
  239. If IsAlpha(sym[sym.length-1]) Then
  240. ' not at the end of the file?
  241. If _source.Length >= _tokePos+sym.length Then
  242. If IsAlpha(TCHR(sym.length-1)) Or IsDigit(TCHR(sym.length-1)) Then
  243. Exit
  244. End If
  245. End If
  246. End If
  247. _tokePos:+sym.length-1
  248. Exit
  249. EndIf
  250. Next
  251. EndIf
  252. If Not _toke _toke=_source[start.._tokePos]
  253. Return _toke
  254. End Method
  255. Method Toke$()
  256. Return _toke
  257. End Method
  258. Method TokeType:Int()
  259. Return _tokeType
  260. End Method
  261. Method SkipSpace:Int()
  262. Local count:Int
  263. While _tokeType=TOKE_SPACE
  264. NextToke
  265. count :+ 1
  266. Wend
  267. Return count
  268. End Method
  269. Method ParseRemStmt()
  270. NextToke()
  271. While _toke
  272. SkipEols()
  273. Local line:String = _lines[_line - 1].Trim().toLower()
  274. If line.startswith("endrem") Then
  275. Exit
  276. End If
  277. If CParse( "end" )
  278. CParse "rem"
  279. Exit
  280. End If
  281. If line.startswith("end rem") Then
  282. Exit
  283. End If
  284. SkipToEOL()
  285. NextToke
  286. Wend
  287. SkipToEOL()
  288. NextToke
  289. _lookingForEndRem = False
  290. End Method
  291. Method CParse:Int( toke$ )
  292. If _tokeLower<>toke
  293. Return False
  294. EndIf
  295. NextToke
  296. Return True
  297. End Method
  298. Method SkipEols()
  299. While CParse( "~n" ) Or CParse(";")
  300. Wend
  301. End Method
  302. Method SkipToEOL()
  303. While TSTR() And TSTR()<>"~n"
  304. _tokePos:+1
  305. Wend
  306. End Method
  307. 'Private
  308. Method TCHR:Int( i:Int=0 )
  309. If _tokePos+i<_source.Length Return _source[_tokePos+i]
  310. End Method
  311. Method TSTR$( i:Int=0 )
  312. If _tokePos+i<_source.Length Return Chr( _source[_tokePos+i] )
  313. End Method
  314. End Type