toker.bmx 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. ' Copyright (c) 2013-2023 Bruce A Henderson
  2. '
  3. ' Based on the public domain Monkey "trans" by Mark Sibly
  4. '
  5. ' This software is provided 'as-is', without any express or implied
  6. ' warranty. In no event will the authors be held liable for any damages
  7. ' arising from the use of this software.
  8. '
  9. ' Permission is granted to anyone to use this software for any purpose,
  10. ' including commercial applications, and to alter it and redistribute it
  11. ' freely, subject to the following restrictions:
  12. '
  13. ' 1. The origin of this software must not be misrepresented; you must not
  14. ' claim that you wrote the original software. If you use this software
  15. ' in a product, an acknowledgment in the product documentation would be
  16. ' appreciated but is not required.
  17. '
  18. ' 2. Altered source versions must be plainly marked as such, and must not be
  19. ' misrepresented as being the original software.
  20. '
  21. ' 3. This notice may not be removed or altered from any source
  22. ' distribution.
  23. '
  24. SuperStrict
  25. Import "config.bmx"
  26. Import "type.bmx"
  27. 'toke_type
  28. Const TOKE_EOF:Int=0
  29. Const TOKE_SPACE:Int=1
  30. Const TOKE_IDENT:Int=2
  31. Const TOKE_KEYWORD:Int=3
  32. Const TOKE_INTLIT:Int=4
  33. Const TOKE_FLOATLIT:Int=5
  34. Const TOKE_STRINGLIT:Int=6
  35. Const TOKE_STRINGLITEX:Int=7
  36. Const TOKE_SYMBOL:Int=8
  37. Const TOKE_LINECOMMENT:Int=9
  38. Const TOKE_LONGLIT:Int=10
  39. Const TOKE_NATIVE:Int=11
  40. Const TOKE_STRINGMULTI:Int=12
  41. Const TOKE_PRAGMA:Int=13
  42. '***** Tokenizer *****
  43. Type TToker
  44. Const __keywords$="strict,superstrict,public,private,protected,byte,short,int,float,double,long,string,object,ptr,var,varptr," + ..
  45. "mod,continue,exit,include,import,module,moduleinfo,extern,framework,new,self,super,eachin,true,false," + ..
  46. "null,not,extends,abstract,final,select,case,default,const,local,global,field,method,function,type," + ..
  47. "and,or,shl,shr,sar,end,if,then,else,elseif,endif,while,wend,repeat,until,forever,for,to,step,goto," + ..
  48. "next,return,alias,rem,endrem,throw,assert,try,catch,finally,nodebug,incbin,endselect,endmethod," + ..
  49. "endfunction,endtype,endextern,endtry,endwhile,pi,release,defdata,readdata,restoredata,interface," + ..
  50. "endinterface,implements,size_t,uint,ulong,struct,endstruct,operator,where,readonly,export,override," + ..
  51. "enum,endenum,stackalloc,inline,fieldoffset,staticarray,threadedglobal,longint,ulongint"
  52. Global _keywords:TMap
  53. Field _path$
  54. Field _line:Int
  55. Field _lines:String[]
  56. Field _source$
  57. Field _toke$
  58. Field _tokeLower$
  59. Field _tokeType:Int
  60. Field _tokePos:Int
  61. Field _lookingForEndRem:Int
  62. Field _preprocess:Int
  63. Field _lineOffset:Int
  64. Method Create:TToker( path$,source$, preprocess:Int = False, lineOffset:Int = 0 )
  65. _path=path
  66. _line=1
  67. _source=source
  68. _toke=""
  69. _tokeType=TOKE_EOF
  70. _tokePos=0
  71. _lines = source.split("~n")
  72. _preprocess = preprocess
  73. _lineOffset = lineOffset
  74. If Not _keywords Then
  75. initKeywords()
  76. End If
  77. Return Self
  78. End Method
  79. Method initKeywords()
  80. _keywords = New TMap
  81. For Local k:String = EachIn __keywords.Split(",")
  82. _keywords.Insert(k, "")
  83. Next
  84. End Method
  85. Method rollback(pos:Int, toketype:Int = -1)
  86. _tokePos = pos
  87. If toketype >= 0
  88. _tokeType = toketype
  89. End If
  90. End Method
  91. Method Copy:TToker( toker:TToker )
  92. _path=toker._path
  93. _line=toker._line
  94. _source=toker._source
  95. _toke=toker._toke
  96. _tokeLower=toker._tokeLower
  97. _tokeType=toker._tokeType
  98. _tokePos=toker._tokePos
  99. _lines=toker._lines
  100. _lookingForEndRem=toker._lookingForEndRem
  101. _preprocess=toker._preprocess
  102. _lineOffset=toker._lineOffset
  103. Return Self
  104. End Method
  105. Method Path$()
  106. Return _path
  107. End Method
  108. Method Line:Int()
  109. Return _line + _lineOffset
  110. End Method
  111. Method NextToke$()
  112. If _tokePos=_source.Length
  113. _toke=""
  114. _tokeType=TOKE_EOF
  115. Return _toke
  116. EndIf
  117. Local char:Int=_source[_tokePos]
  118. Local str$=Chr( char )
  119. Local start:Int=_tokePos
  120. _tokePos:+1
  121. _toke=""
  122. If str="~n"
  123. _line:+1
  124. _tokeType=TOKE_SYMBOL
  125. Else If IsSpace( char )
  126. While _tokePos<_source.Length And IsSpace( TCHR() ) And TSTR()<>"~n"
  127. _tokePos:+1
  128. Wend
  129. _tokeType=TOKE_SPACE
  130. Else If str="_" Or IsAlpha( char )
  131. _tokeType=TOKE_IDENT
  132. While _tokePos<_source.Length
  133. Local char:Int=_source[_tokePos]
  134. If char<>Asc("_") And Not IsAlpha( char ) And Not IsDigit( char ) Exit
  135. _tokePos:+1
  136. Wend
  137. _toke=_source[start.._tokePos]
  138. _tokeLower = _toke.ToLower()
  139. If _keywords.Contains( _tokeLower )
  140. _tokeType=TOKE_KEYWORD
  141. If Not _lookingForEndRem And _tokeLower = "rem" Then
  142. _lookingForEndRem = True
  143. ParseRemStmt()
  144. End If
  145. EndIf
  146. Else If IsDigit( char ) Or (str="." And IsDigit( TCHR() ))
  147. _tokeType=TOKE_INTLIT
  148. If str="." _tokeType=TOKE_FLOATLIT
  149. While IsDigit( TCHR() )
  150. _tokePos:+1
  151. Wend
  152. If _tokeType=TOKE_INTLIT And TSTR()="." And IsDigit( TCHR(1) )
  153. _tokeType=TOKE_FLOATLIT
  154. _tokePos:+2
  155. While IsDigit( TCHR() )
  156. _tokePos:+1
  157. Wend
  158. EndIf
  159. Local _tstr:String = TSTR()
  160. If _tstr="e" Or _tstr="E" Then
  161. _tokeType=TOKE_FLOATLIT
  162. _tokePos:+1
  163. _tstr = TSTR()
  164. If _tstr="+" Or _tstr="-" _tokePos:+1
  165. While IsDigit( TCHR() )
  166. _tokePos:+1
  167. Wend
  168. EndIf
  169. If _tokeType=TOKE_INTLIT And _tokePos-start > 10 ' BaH Long
  170. _tokeType=TOKE_LONGLIT
  171. EndIf
  172. Else If str="%" And IsBinDigit( TCHR() )
  173. _tokeType=TOKE_INTLIT
  174. _tokePos:+1
  175. While IsBinDigit( TCHR() )
  176. _tokePos:+1
  177. Wend
  178. Else If str="$" And IsHexDigit( TCHR() )
  179. _tokeType=TOKE_INTLIT
  180. _tokePos:+1
  181. While IsHexDigit( TCHR() )
  182. _tokePos:+1
  183. Wend
  184. Else If str="~q"
  185. Local isMulti:Int
  186. _tokeType=TOKE_STRINGLIT
  187. Local _tstr:String = TSTR()
  188. If _tstr = "~q" Then
  189. _tokePos:+1
  190. Local _tstr2:String = TSTR()
  191. If _tstr2 = "~q" Then
  192. isMulti = True
  193. Else
  194. _tokePos:-1
  195. End If
  196. End If
  197. If Not isMulti Then
  198. While _tstr And _tstr<>"~q"
  199. ' Strings can't cross line boundries
  200. If _tstr="~n" Then
  201. _tokePos:-1
  202. Exit
  203. End If
  204. _tokePos:+1
  205. _tstr = TSTR()
  206. Wend
  207. Else
  208. Local lineCount:Int
  209. Local count:Int
  210. _tokeType = TOKE_STRINGMULTI
  211. While _tstr
  212. _tokePos:+1
  213. _tstr = TSTR()
  214. If _tstr = "~n" Then
  215. lineCount:+1
  216. End If
  217. If _tstr = "~q" Then
  218. count :+ 1
  219. If count = 3 Then
  220. _line :+ lineCount - 1
  221. Exit
  222. End If
  223. Else
  224. count = 0
  225. End If
  226. Wend
  227. End If
  228. If _tokePos<_source.Length _tokePos:+1 Else _tokeType=TOKE_STRINGLITEX
  229. Else If str="'"
  230. Local _tstr:String = TSTR()
  231. If _tstr="!" Then
  232. _tokeType=TOKE_NATIVE
  233. While _tstr
  234. If _tstr="~n" Then
  235. _tokePos:-1
  236. Exit
  237. Else If _tstr="" Then
  238. Exit
  239. End If
  240. _tokePos:+1
  241. _tstr = TSTR()
  242. Wend
  243. Else
  244. _tokeType=TOKE_LINECOMMENT
  245. SkipToEOL()
  246. Local pos:Int = _tokePos
  247. If pos >= _source.Length
  248. pos = _source.Length - 1
  249. End If
  250. Local tk:String = _source[start + 1..pos].Trim()
  251. If tk.StartsWith("@bmk") Then
  252. _tokeType=TOKE_PRAGMA
  253. Else
  254. ' completely ignore line comments
  255. If TSTR()="~n" Then
  256. start = _tokePos
  257. If _tokePos<_source.Length
  258. _tokePos:+1
  259. End If
  260. _line:+1
  261. _tokeType=TOKE_SYMBOL
  262. End If
  263. End If
  264. End If
  265. Else If str="." And TSTR()="." Then
  266. Local pos:Int = _tokePos
  267. Local isValidTilEOL:Int = True
  268. _tokePos:+1
  269. Local _tstr:String = TSTR()
  270. While _tstr And _tstr<>"~n"
  271. If Not IsSpace(TCHR()) Then
  272. isValidTilEOL = False
  273. End If
  274. _tokePos:+1
  275. _tstr = TSTR()
  276. Wend
  277. If Not isValidTilEOL Or _preprocess Then
  278. _tokePos = pos + 1
  279. _tokeType=TOKE_SYMBOL
  280. Else
  281. start = _tokePos
  282. _toke = " "
  283. _tokePos:+1
  284. _line:+1
  285. _tokeType=TOKE_SPACE
  286. End If
  287. Else
  288. _tokeType=TOKE_SYMBOL
  289. For Local i:Int = 0 Until _symbols.length
  290. Local sym$=_symbols[i]
  291. If char<>sym[0] Continue
  292. 'do not try to read beyond source length
  293. If TCHR(sym.length) <= 0 Then Continue
  294. If _source[_tokePos-1.._tokePos+sym.length-1].ToLower()=sym
  295. ' if symbol has alpha, test for trailing alphanumeric char - in which case this is not a symbol
  296. If IsAlpha(sym[sym.length-1]) Then
  297. ' not at the end of the file?
  298. If _source.Length >= _tokePos+sym.length Then
  299. If IsAlpha(TCHR(sym.length-1)) Or IsDigit(TCHR(sym.length-1)) Then
  300. Exit
  301. End If
  302. End If
  303. End If
  304. _tokePos:+sym.length-1
  305. Exit
  306. EndIf
  307. Next
  308. EndIf
  309. If Not _toke _toke=_source[start.._tokePos]
  310. Return _toke
  311. End Method
  312. Method Toke$()
  313. Return _toke
  314. End Method
  315. Method TokeType:Int()
  316. Return _tokeType
  317. End Method
  318. Method SkipSpace:Int()
  319. Local count:Int
  320. While _tokeType=TOKE_SPACE
  321. NextToke
  322. count :+ 1
  323. Wend
  324. Return count
  325. End Method
  326. Method ParseRemStmt()
  327. NextToke()
  328. While _toke
  329. SkipEols()
  330. Local line:String = _lines[_line - 1].Trim().toLower()
  331. If line.startswith("endrem") Then
  332. Exit
  333. End If
  334. If CParse( "end" )
  335. CParse "rem"
  336. Exit
  337. End If
  338. If line.startswith("end rem") Then
  339. Exit
  340. End If
  341. SkipToEOL()
  342. NextToke
  343. Wend
  344. SkipToEOL()
  345. NextToke
  346. _lookingForEndRem = False
  347. End Method
  348. Method CParse:Int( toke$ )
  349. If _tokeLower<>toke
  350. Return False
  351. EndIf
  352. NextToke
  353. Return True
  354. End Method
  355. Method SkipEols()
  356. While CParse( "~n" ) Or CParse(";")
  357. Wend
  358. End Method
  359. Method SkipToEOL()
  360. While TSTR() And TSTR()<>"~n"
  361. _tokePos:+1
  362. Wend
  363. End Method
  364. 'Private
  365. Method TCHR:Int( i:Int=0 )
  366. If _lastIndex <> _tokePos+i Then
  367. _lastIndex = _tokePos+i
  368. If _lastIndex < _source.Length Then
  369. _lastTCHR = _source[_lastIndex]
  370. _lastTSTR = Chr( _lastTCHR )
  371. Else
  372. _lastTCHR = 0
  373. _lastTSTR = ""
  374. End If
  375. End If
  376. Return _lastTCHR
  377. End Method
  378. Field _lastIndex:Int = -1
  379. Field _lastTCHR:Int
  380. Method TSTR$( i:Int=0 )
  381. If _lastIndex <> _tokePos+i Then
  382. _lastIndex = _tokePos+i
  383. If _lastIndex < _source.Length Then
  384. _lastTCHR = _source[_lastIndex]
  385. _lastTSTR = Chr( _lastTCHR )
  386. Else
  387. _lastTCHR = 0
  388. _lastTSTR = ""
  389. End If
  390. End If
  391. Return _lastTSTR
  392. End Method
  393. Method Join:String(startLine:Int, endLine:Int, s:String)
  394. Local sb:TStringBuffer = New TStringBuffer
  395. For Local i:Int = startLine - 1 To endLine
  396. If i < _lines.Length Then
  397. sb.Append(_lines[i])
  398. sb.Append(s)
  399. End If
  400. Next
  401. Return sb.ToString()
  402. End Method
  403. Field _lastTSTR:String
  404. End Type