toker.bmx 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. ' Copyright (c) 2013-2020 Bruce A Henderson
  2. '
  3. ' Based on the public domain Monkey "trans" by Mark Sibly
  4. '
  5. ' This software is provided 'as-is', without any express or implied
  6. ' warranty. In no event will the authors be held liable for any damages
  7. ' arising from the use of this software.
  8. '
  9. ' Permission is granted to anyone to use this software for any purpose,
  10. ' including commercial applications, and to alter it and redistribute it
  11. ' freely, subject to the following restrictions:
  12. '
  13. ' 1. The origin of this software must not be misrepresented; you must not
  14. ' claim that you wrote the original software. If you use this software
  15. ' in a product, an acknowledgment in the product documentation would be
  16. ' appreciated but is not required.
  17. '
  18. ' 2. Altered source versions must be plainly marked as such, and must not be
  19. ' misrepresented as being the original software.
  20. '
  21. ' 3. This notice may not be removed or altered from any source
  22. ' distribution.
  23. '
  24. SuperStrict
  25. Import "config.bmx"
  26. Import "type.bmx"
  27. 'toke_type
  28. Const TOKE_EOF:Int=0
  29. Const TOKE_SPACE:Int=1
  30. Const TOKE_IDENT:Int=2
  31. Const TOKE_KEYWORD:Int=3
  32. Const TOKE_INTLIT:Int=4
  33. Const TOKE_FLOATLIT:Int=5
  34. Const TOKE_STRINGLIT:Int=6
  35. Const TOKE_STRINGLITEX:Int=7
  36. Const TOKE_SYMBOL:Int=8
  37. Const TOKE_LINECOMMENT:Int=9
  38. Const TOKE_LONGLIT:Int=10
  39. Const TOKE_NATIVE:Int=11
  40. Const TOKE_STRINGMULTI:Int=12
  41. '***** Tokenizer *****
  42. Type TToker
  43. Const __keywords$="strict,superstrict,public,private,protected,byte,short,int,float,double,long,string,object,ptr,var,varptr," + ..
  44. "mod,continue,exit,include,import,module,moduleinfo,extern,framework,new,self,super,eachin,true,false," + ..
  45. "null,not,extends,abstract,final,select,case,default,const,local,global,field,method,function,type," + ..
  46. "and,or,shl,shr,sar,end,if,then,else,elseif,endif,while,wend,repeat,until,forever,for,to,step,goto," + ..
  47. "next,return,alias,rem,endrem,throw,assert,try,catch,finally,nodebug,incbin,endselect,endmethod," + ..
  48. "endfunction,endtype,endextern,endtry,endwhile,pi,release,defdata,readdata,restoredata,interface," + ..
  49. "endinterface,implements,size_t,uint,ulong,struct,endstruct,operator,where,readonly,export,override," + ..
  50. "enum,endenum,stackalloc,inline,fieldoffset"
  51. Global _keywords:TMap
  52. Field _path$
  53. Field _line:Int
  54. Field _lines:String[]
  55. Field _source$
  56. Field _toke$
  57. Field _tokeLower$
  58. Field _tokeType:Int
  59. Field _tokePos:Int
  60. Field _lookingForEndRem:Int
  61. Field _preprocess:Int
  62. Field _lineOffset:Int
  63. Method Create:TToker( path$,source$, preprocess:Int = False, lineOffset:Int = 0 )
  64. _path=path
  65. _line=1
  66. _source=source
  67. _toke=""
  68. _tokeType=TOKE_EOF
  69. _tokePos=0
  70. _lines = source.split("~n")
  71. _preprocess = preprocess
  72. _lineOffset = lineOffset
  73. If Not _keywords Then
  74. initKeywords()
  75. End If
  76. Return Self
  77. End Method
  78. Method initKeywords()
  79. _keywords = New TMap
  80. For Local k:String = EachIn __keywords.Split(",")
  81. _keywords.Insert(k, "")
  82. Next
  83. End Method
  84. Method rollback(pos:Int, toketype:Int = -1)
  85. _tokePos = pos
  86. If toketype >= 0
  87. _tokeType = toketype
  88. End If
  89. End Method
  90. Method Copy:TToker( toker:TToker )
  91. _path=toker._path
  92. _line=toker._line
  93. _source=toker._source
  94. _toke=toker._toke
  95. _tokeLower=toker._tokeLower
  96. _tokeType=toker._tokeType
  97. _tokePos=toker._tokePos
  98. _lines=toker._lines
  99. _lookingForEndRem=toker._lookingForEndRem
  100. _preprocess=toker._preprocess
  101. _lineOffset=toker._lineOffset
  102. Return Self
  103. End Method
  104. Method Path$()
  105. Return _path
  106. End Method
  107. Method Line:Int()
  108. Return _line + _lineOffset
  109. End Method
  110. Method NextToke$()
  111. If _tokePos=_source.Length
  112. _toke=""
  113. _tokeType=TOKE_EOF
  114. Return _toke
  115. EndIf
  116. Local char:Int=_source[_tokePos]
  117. Local str$=Chr( char )
  118. Local start:Int=_tokePos
  119. _tokePos:+1
  120. _toke=""
  121. If str="~n"
  122. _line:+1
  123. _tokeType=TOKE_SYMBOL
  124. Else If IsSpace( char )
  125. While _tokePos<_source.Length And IsSpace( TCHR() ) And TSTR()<>"~n"
  126. _tokePos:+1
  127. Wend
  128. _tokeType=TOKE_SPACE
  129. Else If str="_" Or IsAlpha( char )
  130. _tokeType=TOKE_IDENT
  131. While _tokePos<_source.Length
  132. Local char:Int=_source[_tokePos]
  133. If char<>Asc("_") And Not IsAlpha( char ) And Not IsDigit( char ) Exit
  134. _tokePos:+1
  135. Wend
  136. _toke=_source[start.._tokePos]
  137. _tokeLower = _toke.ToLower()
  138. If _keywords.Contains( _tokeLower )
  139. _tokeType=TOKE_KEYWORD
  140. If Not _lookingForEndRem And _tokeLower = "rem" Then
  141. _lookingForEndRem = True
  142. ParseRemStmt()
  143. End If
  144. EndIf
  145. Else If IsDigit( char ) Or (str="." And IsDigit( TCHR() ))
  146. _tokeType=TOKE_INTLIT
  147. If str="." _tokeType=TOKE_FLOATLIT
  148. While IsDigit( TCHR() )
  149. _tokePos:+1
  150. Wend
  151. If _tokeType=TOKE_INTLIT And TSTR()="." And IsDigit( TCHR(1) )
  152. _tokeType=TOKE_FLOATLIT
  153. _tokePos:+2
  154. While IsDigit( TCHR() )
  155. _tokePos:+1
  156. Wend
  157. EndIf
  158. Local _tstr:String = TSTR()
  159. If _tstr="e" Or _tstr="E" Then
  160. _tokeType=TOKE_FLOATLIT
  161. _tokePos:+1
  162. _tstr = TSTR()
  163. If _tstr="+" Or _tstr="-" _tokePos:+1
  164. While IsDigit( TCHR() )
  165. _tokePos:+1
  166. Wend
  167. EndIf
  168. If _tokeType=TOKE_INTLIT And _tokePos-start > 10 ' BaH Long
  169. _tokeType=TOKE_LONGLIT
  170. EndIf
  171. Else If str="%" And IsBinDigit( TCHR() )
  172. _tokeType=TOKE_INTLIT
  173. _tokePos:+1
  174. While IsBinDigit( TCHR() )
  175. _tokePos:+1
  176. Wend
  177. Else If str="$" And IsHexDigit( TCHR() )
  178. _tokeType=TOKE_INTLIT
  179. _tokePos:+1
  180. While IsHexDigit( TCHR() )
  181. _tokePos:+1
  182. Wend
  183. Else If str="~q"
  184. Local isMulti:Int
  185. _tokeType=TOKE_STRINGLIT
  186. Local _tstr:String = TSTR()
  187. If _tstr = "~q" Then
  188. _tokePos:+1
  189. Local _tstr2:String = TSTR()
  190. If _tstr2 = "~q" Then
  191. isMulti = True
  192. Else
  193. _tokePos:-1
  194. End If
  195. End If
  196. If Not isMulti Then
  197. While _tstr And _tstr<>"~q"
  198. ' Strings can't cross line boundries
  199. If _tstr="~n" Then
  200. _tokePos:-1
  201. Exit
  202. End If
  203. _tokePos:+1
  204. _tstr = TSTR()
  205. Wend
  206. Else
  207. Local lineCount:Int
  208. Local count:Int
  209. _tokeType = TOKE_STRINGMULTI
  210. While _tstr
  211. _tokePos:+1
  212. _tstr = TSTR()
  213. If _tstr = "~n" Then
  214. lineCount:+1
  215. End If
  216. If _tstr = "~q" Then
  217. count :+ 1
  218. If count = 3 Then
  219. _line :+ lineCount - 1
  220. Exit
  221. End If
  222. Else
  223. count = 0
  224. End If
  225. Wend
  226. End If
  227. If _tokePos<_source.Length _tokePos:+1 Else _tokeType=TOKE_STRINGLITEX
  228. Else If str="'"
  229. Local _tstr:String = TSTR()
  230. If _tstr="!" Then
  231. _tokeType=TOKE_NATIVE
  232. While _tstr
  233. If _tstr="~n" Then
  234. _tokePos:-1
  235. Exit
  236. End If
  237. _tokePos:+1
  238. _tstr = TSTR()
  239. Wend
  240. Else
  241. _tokeType=TOKE_LINECOMMENT
  242. SkipToEOL()
  243. ' completely ignore line comments
  244. If TSTR()="~n" Then
  245. start = _tokePos
  246. If _tokePos<_source.Length
  247. _tokePos:+1
  248. End If
  249. _line:+1
  250. _tokeType=TOKE_SYMBOL
  251. End If
  252. End If
  253. Else If str="." And TSTR()="." Then
  254. Local pos:Int = _tokePos
  255. Local isValidTilEOL:Int = True
  256. _tokePos:+1
  257. Local _tstr:String = TSTR()
  258. While _tstr And _tstr<>"~n"
  259. If Not IsSpace(TCHR()) Then
  260. isValidTilEOL = False
  261. End If
  262. _tokePos:+1
  263. _tstr = TSTR()
  264. Wend
  265. If Not isValidTilEOL Or _preprocess Then
  266. _tokePos = pos + 1
  267. _tokeType=TOKE_SYMBOL
  268. Else
  269. start = _tokePos
  270. _toke = " "
  271. _tokePos:+1
  272. _line:+1
  273. _tokeType=TOKE_SPACE
  274. End If
  275. Else
  276. _tokeType=TOKE_SYMBOL
  277. For Local i:Int = 0 Until _symbols.length
  278. Local sym$=_symbols[i]
  279. If char<>sym[0] Continue
  280. 'do not try to read beyond source length
  281. If TCHR(sym.length) <= 0 Then Continue
  282. If _source[_tokePos-1.._tokePos+sym.length-1].ToLower()=sym
  283. ' if symbol has alpha, test for trailing alphanumeric char - in which case this is not a symbol
  284. If IsAlpha(sym[sym.length-1]) Then
  285. ' not at the end of the file?
  286. If _source.Length >= _tokePos+sym.length Then
  287. If IsAlpha(TCHR(sym.length-1)) Or IsDigit(TCHR(sym.length-1)) Then
  288. Exit
  289. End If
  290. End If
  291. End If
  292. _tokePos:+sym.length-1
  293. Exit
  294. EndIf
  295. Next
  296. EndIf
  297. If Not _toke _toke=_source[start.._tokePos]
  298. Return _toke
  299. End Method
  300. Method Toke$()
  301. Return _toke
  302. End Method
  303. Method TokeType:Int()
  304. Return _tokeType
  305. End Method
  306. Method SkipSpace:Int()
  307. Local count:Int
  308. While _tokeType=TOKE_SPACE
  309. NextToke
  310. count :+ 1
  311. Wend
  312. Return count
  313. End Method
  314. Method ParseRemStmt()
  315. NextToke()
  316. While _toke
  317. SkipEols()
  318. Local line:String = _lines[_line - 1].Trim().toLower()
  319. If line.startswith("endrem") Then
  320. Exit
  321. End If
  322. If CParse( "end" )
  323. CParse "rem"
  324. Exit
  325. End If
  326. If line.startswith("end rem") Then
  327. Exit
  328. End If
  329. SkipToEOL()
  330. NextToke
  331. Wend
  332. SkipToEOL()
  333. NextToke
  334. _lookingForEndRem = False
  335. End Method
  336. Method CParse:Int( toke$ )
  337. If _tokeLower<>toke
  338. Return False
  339. EndIf
  340. NextToke
  341. Return True
  342. End Method
  343. Method SkipEols()
  344. While CParse( "~n" ) Or CParse(";")
  345. Wend
  346. End Method
  347. Method SkipToEOL()
  348. While TSTR() And TSTR()<>"~n"
  349. _tokePos:+1
  350. Wend
  351. End Method
  352. 'Private
  353. Method TCHR:Int( i:Int=0 )
  354. If _lastIndex <> _tokePos+i Then
  355. _lastIndex = _tokePos+i
  356. If _lastIndex < _source.Length Then
  357. _lastTCHR = _source[_lastIndex]
  358. _lastTSTR = Chr( _lastTCHR )
  359. Else
  360. _lastTCHR = 0
  361. _lastTSTR = ""
  362. End If
  363. End If
  364. Return _lastTCHR
  365. End Method
  366. Field _lastIndex:Int = -1
  367. Field _lastTCHR:Int
  368. Method TSTR$( i:Int=0 )
  369. If _lastIndex <> _tokePos+i Then
  370. _lastIndex = _tokePos+i
  371. If _lastIndex < _source.Length Then
  372. _lastTCHR = _source[_lastIndex]
  373. _lastTSTR = Chr( _lastTCHR )
  374. Else
  375. _lastTCHR = 0
  376. _lastTSTR = ""
  377. End If
  378. End If
  379. Return _lastTSTR
  380. End Method
  381. Method Join:String(startLine:Int, endLine:Int, s:String)
  382. Local sb:TStringBuffer = New TStringBuffer
  383. For Local i:Int = startLine - 1 To endLine
  384. sb.Append(_lines[i])
  385. sb.Append(s)
  386. Next
  387. Return sb.ToString()
  388. End Method
  389. Field _lastTSTR:String
  390. End Type