toker.bmx 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. ' Copyright (c) 2013-2018 Bruce A Henderson
  2. '
  3. ' Based on the public domain Monkey "trans" by Mark Sibly
  4. '
  5. ' This software is provided 'as-is', without any express or implied
  6. ' warranty. In no event will the authors be held liable for any damages
  7. ' arising from the use of this software.
  8. '
  9. ' Permission is granted to anyone to use this software for any purpose,
  10. ' including commercial applications, and to alter it and redistribute it
  11. ' freely, subject to the following restrictions:
  12. '
  13. ' 1. The origin of this software must not be misrepresented; you must not
  14. ' claim that you wrote the original software. If you use this software
  15. ' in a product, an acknowledgment in the product documentation would be
  16. ' appreciated but is not required.
  17. '
  18. ' 2. Altered source versions must be plainly marked as such, and must not be
  19. ' misrepresented as being the original software.
  20. '
  21. ' 3. This notice may not be removed or altered from any source
  22. ' distribution.
  23. '
  24. SuperStrict
  25. Import "config.bmx"
  26. Import "type.bmx"
  27. 'toke_type
  28. Const TOKE_EOF:Int=0
  29. Const TOKE_SPACE:Int=1
  30. Const TOKE_IDENT:Int=2
  31. Const TOKE_KEYWORD:Int=3
  32. Const TOKE_INTLIT:Int=4
  33. Const TOKE_FLOATLIT:Int=5
  34. Const TOKE_STRINGLIT:Int=6
  35. Const TOKE_STRINGLITEX:Int=7
  36. Const TOKE_SYMBOL:Int=8
  37. Const TOKE_LINECOMMENT:Int=9
  38. Const TOKE_LONGLIT:Int=10
  39. Const TOKE_NATIVE:Int=11
  40. '***** Tokenizer *****
  41. Type TToker
  42. Const __keywords$="strict,superstrict,public,private,protected,short,int,float,double,long,string,object,ptr,var,varptr," + ..
  43. "mod,continue,exit,include,import,module,extern,framework,new,self,super,eachin,true,false," + ..
  44. "null,not,extends,abstract,select,case,default,const,local,global,field,method,function,type," + ..
  45. "and,or,shl,shr,sar,end,if,then,else,elseif,endif,while,wend,repeat,until,forever,for,to,step," + ..
  46. "next,return,alias,rem,endrem,throw,assert,try,catch,finally,nodebug,incbin,endselect,endmethod," + ..
  47. "endfunction,endtype,endextern,endtry,endwhile,pi,release,defdata,readdata,restoredata,interface," + ..
  48. "endinterface,implements,size_t,uint,ulong,struct,endstruct,operator,where,readonly"
  49. Global _keywords:TMap
  50. Field _path$
  51. Field _line:Int
  52. Field _lines:String[]
  53. Field _source$
  54. Field _toke$
  55. Field _tokeLower$
  56. Field _tokeType:Int
  57. Field _tokePos:Int
  58. Field _lookingForEndRem:Int
  59. Field _preprocess:Int
  60. Field _lineOffset:Int
  61. Method Create:TToker( path$,source$, preprocess:Int = False, lineOffset:Int = 0 )
  62. _path=path
  63. _line=1
  64. _source=source
  65. _toke=""
  66. _tokeType=TOKE_EOF
  67. _tokePos=0
  68. _lines = source.split("~n")
  69. _preprocess = preprocess
  70. _lineOffset = lineOffset
  71. If Not _keywords Then
  72. initKeywords()
  73. End If
  74. Return Self
  75. End Method
  76. Method initKeywords()
  77. _keywords = New TMap
  78. For Local k:String = EachIn __keywords.Split(",")
  79. _keywords.Insert(k, "")
  80. Next
  81. End Method
  82. Method rollback(pos:Int, toketype:Int = -1)
  83. _tokePos = pos
  84. If toketype >= 0
  85. _tokeType = toketype
  86. End If
  87. End Method
  88. Method Copy:TToker( toker:TToker )
  89. _path=toker._path
  90. _line=toker._line
  91. _source=toker._source
  92. _toke=toker._toke
  93. _tokeLower=toker._tokeLower
  94. _tokeType=toker._tokeType
  95. _tokePos=toker._tokePos
  96. _lines=toker._lines
  97. _lookingForEndRem=toker._lookingForEndRem
  98. _preprocess=toker._preprocess
  99. _lineOffset=toker._lineOffset
  100. Return Self
  101. End Method
  102. Method Path$()
  103. Return _path
  104. End Method
  105. Method Line:Int()
  106. Return _line + _lineOffset
  107. End Method
  108. Method NextToke$()
  109. If _tokePos=_source.Length
  110. _toke=""
  111. _tokeType=TOKE_EOF
  112. Return _toke
  113. EndIf
  114. Local char:Int=_source[_tokePos]
  115. Local str$=Chr( char )
  116. Local start:Int=_tokePos
  117. _tokePos:+1
  118. _toke=""
  119. If str="~n"
  120. _line:+1
  121. _tokeType=TOKE_SYMBOL
  122. Else If IsSpace( char )
  123. While _tokePos<_source.Length And IsSpace( TCHR() ) And TSTR()<>"~n"
  124. _tokePos:+1
  125. Wend
  126. _tokeType=TOKE_SPACE
  127. Else If str="_" Or IsAlpha( char )
  128. _tokeType=TOKE_IDENT
  129. While _tokePos<_source.Length
  130. Local char:Int=_source[_tokePos]
  131. If char<>Asc("_") And Not IsAlpha( char ) And Not IsDigit( char ) Exit
  132. _tokePos:+1
  133. Wend
  134. _toke=_source[start.._tokePos]
  135. _tokeLower = _toke.ToLower()
  136. If _keywords.Contains( _tokeLower )
  137. _tokeType=TOKE_KEYWORD
  138. If Not _lookingForEndRem And _tokeLower = "rem" Then
  139. _lookingForEndRem = True
  140. ParseRemStmt()
  141. End If
  142. EndIf
  143. Else If IsDigit( char ) Or (str="." And IsDigit( TCHR() ))
  144. _tokeType=TOKE_INTLIT
  145. If str="." _tokeType=TOKE_FLOATLIT
  146. While IsDigit( TCHR() )
  147. _tokePos:+1
  148. Wend
  149. If _tokeType=TOKE_INTLIT And TSTR()="." And IsDigit( TCHR(1) )
  150. _tokeType=TOKE_FLOATLIT
  151. _tokePos:+2
  152. While IsDigit( TCHR() )
  153. _tokePos:+1
  154. Wend
  155. EndIf
  156. Local _tstr:String = TSTR()
  157. If _tstr="e" Or _tstr="E" Then
  158. _tokeType=TOKE_FLOATLIT
  159. _tokePos:+1
  160. _tstr = TSTR()
  161. If _tstr="+" Or _tstr="-" _tokePos:+1
  162. While IsDigit( TCHR() )
  163. _tokePos:+1
  164. Wend
  165. EndIf
  166. If _tokeType=TOKE_INTLIT And _tokePos-start > 10 ' BaH Long
  167. _tokeType=TOKE_LONGLIT
  168. EndIf
  169. Else If str="%" And IsBinDigit( TCHR() )
  170. _tokeType=TOKE_INTLIT
  171. _tokePos:+1
  172. While IsBinDigit( TCHR() )
  173. _tokePos:+1
  174. Wend
  175. Else If str="$" And IsHexDigit( TCHR() )
  176. _tokeType=TOKE_INTLIT
  177. _tokePos:+1
  178. While IsHexDigit( TCHR() )
  179. _tokePos:+1
  180. Wend
  181. Else If str="~q"
  182. _tokeType=TOKE_STRINGLIT
  183. Local _tstr:String = TSTR()
  184. While _tstr And _tstr<>"~q"
  185. ' Strings can't cross line boundries
  186. If _tstr="~n" Then
  187. _tokePos:-1
  188. Exit
  189. End If
  190. _tokePos:+1
  191. _tstr = TSTR()
  192. Wend
  193. If _tokePos<_source.Length _tokePos:+1 Else _tokeType=TOKE_STRINGLITEX
  194. Else If str="'"
  195. Local _tstr:String = TSTR()
  196. If _tstr="!" Then
  197. _tokeType=TOKE_NATIVE
  198. While _tstr
  199. If _tstr="~n" Then
  200. _tokePos:-1
  201. Exit
  202. End If
  203. _tokePos:+1
  204. _tstr = TSTR()
  205. Wend
  206. Else
  207. _tokeType=TOKE_LINECOMMENT
  208. SkipToEOL()
  209. ' completely ignore line comments
  210. If TSTR()="~n" Then
  211. start = _tokePos
  212. If _tokePos<_source.Length
  213. _tokePos:+1
  214. End If
  215. _line:+1
  216. _tokeType=TOKE_SYMBOL
  217. End If
  218. End If
  219. Else If str="." And TSTR()="." Then
  220. Local pos:Int = _tokePos
  221. Local isValidTilEOL:Int = True
  222. _tokePos:+1
  223. Local _tstr:String = TSTR()
  224. While _tstr And _tstr<>"~n"
  225. If Not IsSpace(TCHR()) Then
  226. isValidTilEOL = False
  227. End If
  228. _tokePos:+1
  229. _tstr = TSTR()
  230. Wend
  231. If Not isValidTilEOL Or _preprocess Then
  232. _tokePos = pos + 1
  233. _tokeType=TOKE_SYMBOL
  234. Else
  235. start = _tokePos
  236. _toke = " "
  237. _tokePos:+1
  238. _line:+1
  239. _tokeType=TOKE_SPACE
  240. End If
  241. Else
  242. _tokeType=TOKE_SYMBOL
  243. For Local i:Int = 0 Until _symbols.length
  244. Local sym$=_symbols[i]
  245. If char<>sym[0] Continue
  246. 'do not try to read beyond source length
  247. If TCHR(sym.length) <= 0 Then Continue
  248. If _source[_tokePos-1.._tokePos+sym.length-1].ToLower()=sym
  249. ' if symbol has alpha, test for trailing alphanumeric char - in which case this is not a symbol
  250. If IsAlpha(sym[sym.length-1]) Then
  251. ' not at the end of the file?
  252. If _source.Length >= _tokePos+sym.length Then
  253. If IsAlpha(TCHR(sym.length-1)) Or IsDigit(TCHR(sym.length-1)) Then
  254. Exit
  255. End If
  256. End If
  257. End If
  258. _tokePos:+sym.length-1
  259. Exit
  260. EndIf
  261. Next
  262. EndIf
  263. If Not _toke _toke=_source[start.._tokePos]
  264. Return _toke
  265. End Method
  266. Method Toke$()
  267. Return _toke
  268. End Method
  269. Method TokeType:Int()
  270. Return _tokeType
  271. End Method
  272. Method SkipSpace:Int()
  273. Local count:Int
  274. While _tokeType=TOKE_SPACE
  275. NextToke
  276. count :+ 1
  277. Wend
  278. Return count
  279. End Method
  280. Method ParseRemStmt()
  281. NextToke()
  282. While _toke
  283. SkipEols()
  284. Local line:String = _lines[_line - 1].Trim().toLower()
  285. If line.startswith("endrem") Then
  286. Exit
  287. End If
  288. If CParse( "end" )
  289. CParse "rem"
  290. Exit
  291. End If
  292. If line.startswith("end rem") Then
  293. Exit
  294. End If
  295. SkipToEOL()
  296. NextToke
  297. Wend
  298. SkipToEOL()
  299. NextToke
  300. _lookingForEndRem = False
  301. End Method
  302. Method CParse:Int( toke$ )
  303. If _tokeLower<>toke
  304. Return False
  305. EndIf
  306. NextToke
  307. Return True
  308. End Method
  309. Method SkipEols()
  310. While CParse( "~n" ) Or CParse(";")
  311. Wend
  312. End Method
  313. Method SkipToEOL()
  314. While TSTR() And TSTR()<>"~n"
  315. _tokePos:+1
  316. Wend
  317. End Method
  318. 'Private
  319. Method TCHR:Int( i:Int=0 )
  320. If _lastIndex <> _tokePos+i Then
  321. _lastIndex = _tokePos+i
  322. If _lastIndex < _source.Length Then
  323. _lastTCHR = _source[_lastIndex]
  324. _lastTSTR = Chr( _lastTCHR )
  325. Else
  326. _lastTCHR = 0
  327. _lastTSTR = ""
  328. End If
  329. End If
  330. Return _lastTCHR
  331. End Method
  332. Field _lastIndex:Int = -1
  333. Field _lastTCHR:Int
  334. Method TSTR$( i:Int=0 )
  335. If _lastIndex <> _tokePos+i Then
  336. _lastIndex = _tokePos+i
  337. If _lastIndex < _source.Length Then
  338. _lastTCHR = _source[_lastIndex]
  339. _lastTSTR = Chr( _lastTCHR )
  340. Else
  341. _lastTCHR = 0
  342. _lastTSTR = ""
  343. End If
  344. End If
  345. Return _lastTSTR
  346. End Method
  347. Method Join:String(startLine:Int, endLine:Int, s:String)
  348. Local sb:TStringBuffer = New TStringBuffer
  349. For Local i:Int = startLine - 1 To endLine
  350. sb.Append(_lines[i])
  351. sb.Append(s)
  352. Next
  353. Return sb.ToString()
  354. End Method
  355. Field _lastTSTR:String
  356. End Type