regex.bmx 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998
  1. ' Copyright (c) 2007-2024 Bruce A Henderson
  2. ' All rights reserved.
  3. '
  4. ' Redistribution and use in source and binary forms, with or without
  5. ' modification, are permitted provided that the following conditions are met:
  6. ' * Redistributions of source code must retain the above copyright
  7. ' notice, this list of conditions and the following disclaimer.
  8. ' * Redistributions in binary form must reproduce the above copyright
  9. ' notice, this list of conditions and the following disclaimer in the
  10. ' documentation and/or other materials provided with the distribution.
  11. ' * Neither the name of Bruce A Henderson nor the
  12. ' names of its contributors may be used to endorse or promote products
  13. ' derived from this software without specific prior written permission.
  14. '
  15. ' THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
  16. ' EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  17. ' WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. ' DISCLAIMED. IN NO EVENT SHALL Bruce A Henderson BE LIABLE FOR ANY
  19. ' DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  20. ' (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  21. ' LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  22. ' ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. ' (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  24. ' SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. '
  26. SuperStrict
  27. Rem
  28. bbdoc: Regular Expressions
  29. End Rem
  30. Module Text.RegEx
  31. ModuleInfo "Version: 1.12"
  32. ModuleInfo "Author: PCRE - Philip Hazel"
  33. ModuleInfo "License: BSD"
  34. ModuleInfo "Copyright: PCRE - 1997-2021 University of Cambridge"
  35. ModuleInfo "Copyright: Wrapper - 2007-2024 Bruce A Henderson"
  36. ModuleInfo "History: 1.12"
  37. ModuleInfo "History: Updated to PCRE 10.43"
  38. ModuleInfo "History: Options are now configured per search. Default options are used if none provided."
  39. ModuleInfo "History: Added SetDefaultOptions method."
  40. ModuleInfo "History: Options can be ignored in preference for pattern provided options."
  41. ModuleInfo "History: 1.11"
  42. ModuleInfo "History: Updated to PCRE 10.39"
  43. ModuleInfo "History: 1.10"
  44. ModuleInfo "History: Updated to PCRE 10.31"
  45. ModuleInfo "History: 1.09"
  46. ModuleInfo "History: Fixed issue using wrong ovector offset."
  47. ModuleInfo "History: 1.08"
  48. ModuleInfo "History: Updated to PCRE 10.30"
  49. ModuleInfo "History: 1.07"
  50. ModuleInfo "History: Updated for 64-bit."
  51. ModuleInfo "History: 1.06"
  52. ModuleInfo "History: Updated to PCRE 10.00"
  53. ModuleInfo "History: Changed TRegExMatch to get data as required, rather than cache it."
  54. ModuleInfo "History: Added support for JIT compilation via new options."
  55. ModuleInfo "History: Added ByName methods for sub expression retrieval."
  56. ModuleInfo "History: 1.05"
  57. ModuleInfo "History: Updated to PCRE 8.34"
  58. ModuleInfo "History: Changed to use pcre16 functions, which is BlitzMax's native character size. (no more utf8 conversions)"
  59. ModuleInfo "History: Don't include the zero-termination character in sub expressions."
  60. ModuleInfo "History: 1.04"
  61. ModuleInfo "History: Updated to PCRE 8.0"
  62. ModuleInfo "History: Fixed offset problems when working with non-ascii text."
  63. ModuleInfo "History: Fixed Replace() where loop was overflowing."
  64. ModuleInfo "History: Added test_08 for utf-8."
  65. ModuleInfo "History: 1.03"
  66. ModuleInfo "History: Updated to PCRE 7.4"
  67. ModuleInfo "History: 1.02"
  68. ModuleInfo "History: Added grable's subquery/replace tweak."
  69. ModuleInfo "History: 1.01"
  70. ModuleInfo "History: Options now global."
  71. ModuleInfo "History: 1.00"
  72. ModuleInfo "History: Initial Release. (PCRE 7.0)"
  73. ModuleInfo "CC_OPTS: -DHAVE_CONFIG_H -DPCRE2_CODE_UNIT_WIDTH=16"
  74. ?macos
  75. ModuleInfo "CC_VOPT: osversion|-mmacosx-version-min=11.00"
  76. ?
  77. Import "common.bmx"
  78. Rem
  79. bbdoc: Performs #Find and #Replace / #ReplaceAll on strings using Perl Compatible Regular Expressions.
  80. End Rem
  81. Type TRegEx
  82. ' Configures the way in which Regular Expressions are handled.
  83. Global defaultOptions:TRegExOptions
  84. ' The options to use for this search.
  85. Field options:TRegExOptions
  86. ' The pattern to search for.
  87. Field searchPattern:String
  88. ' The replacement pattern string.
  89. 'Field replacePattern:String
  90. Field lastPattern:String
  91. Field lastTarget:String
  92. ' pointer to the target string (as a WString)
  93. Field TArg:Byte Ptr
  94. ' the length of the target string
  95. Field targLength:Size_T
  96. Field lastEndPos:Size_T
  97. ' pointer to the compiled expression
  98. Field pcre:Byte Ptr
  99. ' pointer to the offsets vector, owned by pcre2
  100. Field offsets:Size_T Ptr
  101. ' number of offsets
  102. Field sizeOffsets:Int
  103. Field matchPtr:Byte Ptr
  104. Field compiled:Int = False
  105. Method Delete()
  106. If TArg Then
  107. MemFree(TArg)
  108. End If
  109. If matchPtr Then
  110. pcre2_match_data_free_16(matchPtr)
  111. End If
  112. If pcre
  113. MemFree(pcre)
  114. EndIf
  115. End Method
  116. Rem
  117. bbdoc: Creates a new #TRegEx object.
  118. about: @searchPattern is the regular expression with which to perform the search.
  119. End Rem
  120. Method New(searchPattern:String, options:TRegExOptions = Null)
  121. Self.searchPattern = searchPattern
  122. If options Then
  123. Self.options = options
  124. Else
  125. If Not defaultOptions Then
  126. defaultOptions = New TRegExOptions
  127. End If
  128. Self.options = defaultOptions
  129. End If
  130. End Method
  131. Rem
  132. bbdoc: Creates a new #TRegEx object.
  133. about: @searchPattern is the regular expression with which to perform the search.
  134. End Rem
  135. Function Create:TRegEx(searchPattern:String, options:TRegExOptions = Null)
  136. Return New TRegEx(searchPattern, options)
  137. End Function
  138. Rem
  139. bbdoc: Sets the default options for all new #TRegEx objects.
  140. about: This is useful if you want to set the options once and use them for all searches.
  141. End Rem
  142. Function SetDefaultOptions(options:TRegExOptions)
  143. defaultOptions = options
  144. End Function
  145. Rem
  146. bbdoc: Replaces all occurances of the search Pattern with @replaceWith on @target, from @startPos.
  147. returns: The newly replaced string.
  148. about: Doesn't affect the original @target contents.
  149. End Rem
  150. Method ReplaceAll:String(target:String, replaceWith:String, startPos:Size_T = 0)
  151. If Not options Then
  152. options = New TRegExOptions
  153. End If
  154. ' remember the current setting
  155. Local oldOpt:Int = options.replaceAllMatches
  156. ' enable global replace
  157. options.replaceAllMatches = True
  158. ' let Replace do all the work
  159. Local s:String = Replace(target, replaceWith, startPos)
  160. ' put back the "current" setting
  161. options.replaceAllMatches = oldOpt
  162. Return s
  163. End Method
  164. Rem
  165. bbdoc: Replaces the first occurance of the search Pattern with @replaceWith on @target, from @startPos.
  166. returns: The newly replaced string.
  167. about: To access a specific subquery during the replace, you can use the \n syntax in @replaceWith, where \0
  168. refers to the whole match, and \1 refers to the first subquery/group, and so on. <i>(see
  169. <a href="../tests/test_07.bmx">test_07</a> for an example)</i>.
  170. <p>Doesn't affect the original @target contents.</p>
  171. End Rem
  172. Method Replace:String(target:String, replaceWith:String, startPos:Size_T = 0)
  173. If Not options Then
  174. options = New TRegExOptions
  175. End If
  176. Local globalReplace:Int = options.replaceAllMatches
  177. ' the search pattern has changed !
  178. ' recompile
  179. ' this only happens very occasionally... (probably ;-)
  180. 'If lastPattern <> searchPattern Or Not pcre Then
  181. If Not compiled Then
  182. init()
  183. End If
  184. Local retString:String
  185. ' this is a new target...
  186. If target <> lastTarget Then
  187. lastTarget = target
  188. If TArg Then
  189. MemFree(TArg)
  190. End If
  191. TArg = target.toWString()
  192. targLength = target.length
  193. End If
  194. ' initial search...
  195. Local result:Int = pcre2_match_16(pcre, TArg, targLength, startPos, getExecOpt(), matchPtr, Null)
  196. ' if there wasn't an error... process the match (even for no-match)
  197. While result >= 0 Or result = PCRE2_ERROR_NOMATCH
  198. sizeOffsets = pcre2_get_ovector_count_16(matchPtr)
  199. offsets = pcre2_get_ovector_pointer_16(matchPtr)
  200. Local replaceStr:String = replaceWith
  201. Local ofs:Size_T Ptr = offsets
  202. For Local i:Int = 0 Until result
  203. Local idx:Int = i * 2
  204. replaceStr = replaceStr.Replace( "\" + i, lastTarget[ofs[idx]..ofs[idx+1]])
  205. Next
  206. If result > 0 Then
  207. ' add text so far, and the replacement
  208. retString:+ lastTarget[startPos..offsets[0]] + replaceStr
  209. Else
  210. ' search finished. Fill to the end
  211. retString:+ lastTarget[startPos..targLength]
  212. Exit
  213. End If
  214. ' set start to the end of the last match position
  215. startPos = offsets[1]
  216. ' only doing the first replace? Then we can exit now...
  217. If Not globalReplace Then
  218. ' all done. Fill to the end
  219. retString:+ lastTarget[startPos..targLength]
  220. Exit
  221. End If
  222. ' find the next match
  223. result = pcre2_match_16(pcre, TArg, targLength, startPos, getExecOpt(), matchPtr, Null)
  224. Wend
  225. Return retString ' convert back to max string
  226. End Method
  227. Rem
  228. bbdoc: Performs a search on the given @target from @startPos, using the search Pattern.
  229. returns: A #TRegExMatch object or Null if no matches found.
  230. about: If @target is <b>not</b> set, the search will use the <b>previous</b> @target.
  231. You will want to set @target the first time this method is called.<br>
  232. If you call this method with no parameters it will start the search from the end of the last search, effectively
  233. iterating through the target string.
  234. End Rem
  235. Method Find:TRegExMatch(target:String = Null)
  236. If Not options Then
  237. options = New TRegExOptions
  238. End If
  239. ' the search pattern has changed !
  240. ' recompile
  241. ' this only happens very occasionally... (probably ;-)
  242. 'If lastPattern <> searchPattern Or Not pcre Then
  243. If Not compiled Then
  244. init()
  245. End If
  246. Local startPos:Size_T
  247. ' no target specified, we are probably performing another search on the original target
  248. If Not target Then
  249. ' no lastTarget? Not allowed.
  250. If Not lastTarget Then
  251. Return Null
  252. End If
  253. startPos = lastEndPos
  254. Else
  255. ' this is a new target...
  256. If target <> lastTarget Then
  257. lastTarget = target
  258. If TArg Then
  259. MemFree(TArg)
  260. End If
  261. TArg = target.toWString()
  262. targLength = target.length
  263. End If
  264. End If
  265. Return DoFind(startPos)
  266. End Method
  267. Method DoFind:TRegExMatch(startPos:Size_T)
  268. Local result:Int = pcre2_match_16(pcre, TArg, targLength, startPos, getExecOpt(), matchPtr, Null)
  269. If result >= 0 Then
  270. sizeOffsets = pcre2_get_ovector_count_16(matchPtr)
  271. offsets = pcre2_get_ovector_pointer_16(matchPtr)
  272. Local match:TRegExMatch = New TRegExMatch(pcre, matchPtr)
  273. lastEndPos = offsets[1]
  274. Return match
  275. Else
  276. ' no point raising an exception when nothing found... we can just return a null object
  277. If result = PCRE2_ERROR_NOMATCH Then
  278. Return Null
  279. End If
  280. ' there was an error of some kind... throw it!
  281. Throw TRegExException.Raise(result)
  282. End If
  283. End Method
  284. Rem
  285. bbdoc: Performs a search on the given @target from @startPos, using the search Pattern.
  286. returns: A #TRegExMatch object or Null if no matches found.
  287. End Rem
  288. Method Find:TRegExMatch(target:String, startPos:Size_T)
  289. If Not options Then
  290. options = New TRegExOptions
  291. End If
  292. ' the search pattern has changed !
  293. ' recompile
  294. ' this only happens very occasionally... (probably ;-)
  295. 'If lastPattern <> searchPattern Or Not pcre Then
  296. If Not compiled Then
  297. init()
  298. End If
  299. ' no target specified, we are probably performing another search on the original target
  300. If Not target Then
  301. ' no lastTarget? Not allowed.
  302. If Not lastTarget Then
  303. Return Null
  304. End If
  305. ' no startPos? then we'll start from the end of the last search point
  306. If startPos < 0 Then
  307. startPos = lastEndPos
  308. End If
  309. Else
  310. ' this is a new target...
  311. If target <> lastTarget Then
  312. lastTarget = target
  313. If TArg Then
  314. MemFree(TArg)
  315. End If
  316. TArg = target.toWString()
  317. targLength = target.length
  318. End If
  319. End If
  320. ' set the startPos to 0 if not already set
  321. If startPos < 0 Then
  322. startPos = 0
  323. End If
  324. Return DoFind(startPos)
  325. End Method
  326. ' resets and recompiles the regular expression
  327. Method init()
  328. lastPattern = searchPattern
  329. Local pat:Short Ptr = lastPattern.ToWString()
  330. Local errorcode:Int
  331. Local erroffset:Size_T
  332. Local bptr:Byte Ptr = pcre2_compile_16(pat, Size_T(lastPattern.length), getCompileOpt(), Varptr errorcode, ..
  333. Varptr erroffset, Null)
  334. MemFree(pat)
  335. If bptr Then
  336. If pcre
  337. MemFree(pcre)
  338. EndIf
  339. pcre = bptr
  340. Else
  341. Local buffer:Short[256]
  342. pcre2_get_error_message_16(errorcode, buffer, 256)
  343. Throw TRegExException.Raise(-99, String.fromWString(buffer))
  344. End If
  345. Local jitOptions:Int = getJITOpt()
  346. If jitOptions Then
  347. Local result:Int = pcre2_jit_compile_16(pcre, jitOptions)
  348. End If
  349. If matchPtr Then
  350. pcre2_match_data_free_16(matchPtr)
  351. End If
  352. matchPtr = pcre2_match_data_create_from_pattern_16(pcre, Null)
  353. compiled = True
  354. End Method
  355. ' possible options for compiling
  356. Method getCompileOpt:Int()
  357. Local opt:Int = PCRE2_UTF
  358. If options.onlyPatternOptions Then
  359. Return opt
  360. End If
  361. If Not options.caseSensitive Then
  362. opt:| PCRE2_CASELESS
  363. End If
  364. If options.dotMatchAll Then
  365. opt:| PCRE2_DOTALL
  366. End If
  367. If Not options.greedy Then
  368. opt:| PCRE2_UNGREEDY
  369. End If
  370. Select options.lineEndType
  371. Case 1
  372. opt:| PCRE2_NEWLINE_CR
  373. Case 2
  374. opt:| PCRE2_NEWLINE_LF
  375. Case 3
  376. opt:| PCRE2_NEWLINE_CRLF
  377. Default
  378. opt:| PCRE2_NEWLINE_ANY
  379. End Select
  380. If Not options.matchEmpty Then
  381. opt:| PCRE2_NOTEMPTY
  382. End If
  383. If options.targetIsMultiline Then
  384. opt:| PCRE2_MULTILINE
  385. End If
  386. If options.dollarEndOnly Then
  387. opt:| PCRE2_DOLLAR_ENDONLY
  388. End If
  389. If options.extended Then
  390. opt:| PCRE2_EXTENDED
  391. End If
  392. Return opt
  393. End Method
  394. ' possible options for execution
  395. Method getExecOpt:Int()
  396. Local opt:Int
  397. If options.onlyPatternOptions Then
  398. Return opt
  399. End If
  400. Select options.lineEndType
  401. Case 1
  402. opt:| PCRE2_NEWLINE_CR
  403. Case 2
  404. opt:| PCRE2_NEWLINE_LF
  405. Case 3
  406. opt:| PCRE2_NEWLINE_CRLF
  407. Default
  408. opt:| PCRE2_NEWLINE_ANY
  409. End Select
  410. If Not options.matchEmpty Then
  411. opt:| PCRE2_NOTEMPTY
  412. End If
  413. If Not options.stringIsLineBeginning Then
  414. opt:| PCRE2_NOTBOL
  415. End If
  416. If Not options.stringIsLineEnding Then
  417. opt:| PCRE2_NOTEOL
  418. End If
  419. Return opt
  420. End Method
  421. ' possible options for jit
  422. Method getJITOpt:Int()
  423. Local opt:Int
  424. If options.onlyPatternOptions Then
  425. Return opt
  426. End If
  427. If options.jitComplete Then
  428. opt :| PCRE2_JIT_COMPLETE
  429. End If
  430. If options.jitPartialSoft Then
  431. opt :| PCRE2_JIT_PARTIAL_SOFT
  432. End If
  433. If options.jitPartialHard Then
  434. opt :| PCRE2_JIT_PARTIAL_HARD
  435. End If
  436. End Method
  437. Rem
  438. bbdoc: Returns which optional features are available.
  439. End Rem
  440. Function Config:Int(what:Int, where_:Int Var)
  441. If what <> PCRE2_CONFIG_UNICODE_VERSION And what <> PCRE2_CONFIG_VERSION Then
  442. Return pcre2_config_16(what, Varptr where_)
  443. End If
  444. Return PCRE2_ERROR_BADOPTION
  445. End Function
  446. End Type
  447. Rem
  448. bbdoc: Used to extract the matched string when doing a search with regular expressions.
  449. End Rem
  450. Type TRegExMatch
  451. Private
  452. Field pcre:Byte Ptr
  453. Field matchPtr:Byte Ptr
  454. Field count:UInt
  455. Method New(pcre:Byte Ptr, matchPtr:Byte Ptr)
  456. Self.pcre = pcre
  457. Self.matchPtr = matchPtr
  458. Self.count = pcre2_get_ovector_count_16(matchPtr)
  459. End Method
  460. Public
  461. Rem
  462. bbdoc: Returns the number of subexpressions as a result of the search.
  463. End Rem
  464. Method SubCount:Int()
  465. Return count
  466. End Method
  467. Rem
  468. bbdoc: Returns the subexpression for @matchNumber.
  469. returns: The matched string, the subexpression string, or "" if @matchNumber is out of range.
  470. about: For expressions with no subpattern groups, this method can be used without
  471. a parameter to return the matched string.
  472. End Rem
  473. Method SubExp:String(matchNumber:Int = 0)
  474. Local _subExpr:String
  475. If matchNumber >= 0 And matchNumber < count Then
  476. Local sPtr:Short Ptr
  477. Local sLen:Size_T
  478. Local result:Int = pcre2_substring_get_bynumber_16(matchPtr, matchNumber, Varptr sPtr, Varptr sLen)
  479. If Not result Then
  480. _subExpr = String.FromShorts(sPtr, Int(sLen))
  481. pcre2_substring_free_16(sPtr)
  482. End If
  483. End If
  484. Return _subExpr
  485. End Method
  486. Rem
  487. bbdoc: Returns the start position for subexpression @matchNumber.
  488. returns: The start position, or -1 if @matchNumber is out of range.
  489. about: For expressions with no subpattern groups, this method can be used without a parameter
  490. to return the start position of the matched string.
  491. End Rem
  492. Method SubStart:Int(matchNumber:Int = 0)
  493. If matchNumber >= 0 And matchNumber < count Then
  494. Local offsets:Size_T Ptr = pcre2_get_ovector_pointer_16(matchPtr)
  495. Return offsets[matchNumber]
  496. End If
  497. Return -1
  498. End Method
  499. Rem
  500. bbdoc: Returns the end position for subexpression @matchNumber.
  501. returns: The end position, or -1 if @matchNumber is out of range.
  502. about: For expressions with no subpattern groups, this method can be used without a parameter
  503. to return the end position of the matched string.
  504. End Rem
  505. Method SubEnd:Int(matchNumber:Int = 0)
  506. If matchNumber >= 0 And matchNumber < count Then
  507. Local offsets:Size_T Ptr = pcre2_get_ovector_pointer_16(matchPtr)
  508. Return offsets[matchNumber + 1] - 1
  509. End If
  510. Return -1
  511. End Method
  512. Rem
  513. bbdoc: Returns the subexpression for the given @name.
  514. returns: The matched string, the subexpression string, or "" if @matchNumber is out of range.
  515. End Rem
  516. Method SubExp:String(name:String)
  517. Return SubExpByName(name)
  518. End Method
  519. Rem
  520. bbdoc: Returns the subexpression for the given @name.
  521. returns: The matched string, the subexpression string, or "" if @matchNumber is out of range.
  522. End Rem
  523. Method SubExpByName:String(name:String)
  524. Local _subExpr:String
  525. If name Then
  526. Local sPtr:Short Ptr
  527. Local sLen:Size_T
  528. Local n:Short Ptr = name.ToWString()
  529. Local result:Int = pcre2_substring_get_byname_16(matchPtr, n, Varptr sPtr, Varptr sLen)
  530. MemFree(n)
  531. If Not result Then
  532. _subExpr = String.FromShorts(sPtr, Int(sLen))
  533. pcre2_substring_free_16(sPtr)
  534. End If
  535. End If
  536. Return _subExpr
  537. End Method
  538. Rem
  539. bbdoc: Returns the index of the subexpression for the given @name.
  540. End Rem
  541. Method SubIndex:Int(name:String)
  542. Return SubIndexByName(name)
  543. End Method
  544. Rem
  545. bbdoc: Returns the index of the subexpression for the given @name.
  546. End Rem
  547. Method SubIndexByName:Int(name:String)
  548. If name Then
  549. Local n:Short Ptr = name.ToWString()
  550. Local index:Int = pcre2_substring_number_from_name_16(pcre, n)
  551. MemFree(n)
  552. If index >= 0 Then
  553. Return index
  554. End If
  555. End If
  556. Return -1
  557. End Method
  558. Rem
  559. bbdoc: Returns the start position of the subexpression for the given @name.
  560. End Rem
  561. Method SubStart:Int(name:String)
  562. Return SubStartByName(name)
  563. End Method
  564. Rem
  565. bbdoc: Returns the start position of the subexpression for the given @name.
  566. End Rem
  567. Method SubStartByName:Int(name:String)
  568. If name Then
  569. Local n:Short Ptr = name.ToWString()
  570. Local index:Int = pcre2_substring_number_from_name_16(pcre, n)
  571. MemFree(n)
  572. If index >= 0 Then
  573. Local offsets:Size_T Ptr = pcre2_get_ovector_pointer_16(matchPtr)
  574. Return offsets[index]
  575. End If
  576. End If
  577. Return -1
  578. End Method
  579. Rem
  580. bbdoc: Returns the end position of the subexpression for the given @name.
  581. End Rem
  582. Method SubEnd:Int(name:String)
  583. Return SubEndByName(name)
  584. End Method
  585. Rem
  586. bbdoc: Returns the end position of the subexpression for the given @name.
  587. End Rem
  588. Method SubEndByName:Int(name:String)
  589. If name Then
  590. Local n:Short Ptr = name.ToWString()
  591. Local index:Int = pcre2_substring_number_from_name_16(pcre, n)
  592. MemFree(n)
  593. If index >= 0 Then
  594. Local offsets:Size_T Ptr = pcre2_get_ovector_pointer_16(matchPtr)
  595. Return offsets[index + 1] - 1
  596. End If
  597. End If
  598. Return -1
  599. End Method
  600. End Type
  601. Rem
  602. bbdoc: Specifies options used when performing searches.
  603. End Rem
  604. Type TRegExOptions
  605. Rem
  606. bbdoc: Ignore other options and use only the pattern's options, like case sensitivity, etc.
  607. End Rem
  608. Field onlyPatternOptions:Int = False
  609. Rem
  610. bbdoc: Whether matches are case sensitive.
  611. End Rem
  612. Field caseSensitive:Int = False
  613. Rem
  614. bbdoc: Allow dot (period) to match new lines as well as everything else.
  615. about: False indicates dot doesn't match new lines.
  616. End Rem
  617. Field dotMatchAll:Int = False
  618. Rem
  619. bbdoc: Greedy matches everything from the beginning of the first delimeter to the end of the last delimiter, and everything in between.
  620. about:
  621. End Rem
  622. Field greedy:Int = True
  623. Rem
  624. bbdoc: Determines how new lines are interpreted.
  625. about:
  626. <ul>
  627. <li>0 - any line ending</li>
  628. <li>1 - \r</li>
  629. <li>2 - \n</li>
  630. <li>3 - \r\n</li>
  631. </ul>
  632. End Rem
  633. Field lineEndType:Int = 0
  634. Rem
  635. bbdoc: Allow patterns to match empty strings.
  636. End Rem
  637. Field matchEmpty:Int = True
  638. 'Rem
  639. 'bbdoc: Indicates whether all occurances of matches will be replaced.
  640. 'about: Only applicable during a Replace.
  641. 'End Rem
  642. Field replaceAllMatches:Int = False
  643. Rem
  644. bbdoc: Count the beginning of a string as the beginning of a line.
  645. End Rem
  646. Field stringIsLineBeginning:Int = True
  647. Rem
  648. bbdoc: Count the end of a string as the end of a line.
  649. End Rem
  650. Field stringIsLineEnding:Int = True
  651. Rem
  652. bbdoc: Matches internal new lines against ^ and $.
  653. about: Set to false to ignore internal new lines.
  654. End Rem
  655. Field targetIsMultiline:Int = True
  656. Rem
  657. bbdoc: Dollar ($) matches newline at end.
  658. about: Set to True for dollar to only match the end of the string, otherwise
  659. matches a newline before the end of the string.
  660. End Rem
  661. Field dollarEndOnly:Int = False
  662. Rem
  663. bbdoc: Ignore whitespace and # comments.
  664. about: When set to True, whitespace in the pattern (other than in a character class) and
  665. characters between a # outside a character class and the next newline are ignored.<br>
  666. An escaping backslash can be used to include a whitespace or # character as part of the pattern.
  667. End Rem
  668. Field extended:Int = False
  669. Rem
  670. bbdoc: Compile code for full matching.
  671. about: When set to True, the JIT compiler is enabled.
  672. End Rem
  673. Field jitComplete:Int = False
  674. Rem
  675. bbdoc: Compile code For soft partial matching.
  676. about: When set to True, the JIT compiler is enabled.
  677. End Rem
  678. Field jitPartialSoft:Int = False
  679. Rem
  680. bbdoc: Compile code for hard partial matching.
  681. about: When set to True, the JIT compiler is enabled.
  682. End Rem
  683. Field jitPartialHard:Int = False
  684. End Type
  685. Rem
  686. bbdoc: A Regular Expression exception.
  687. about: This can be thrown either during regular expression compilation (-99) or during a search (-1 to -23).
  688. End Rem
  689. Type TRegExException
  690. Rem
  691. bbdoc: The type of error thrown.
  692. about: -99 is a regular expression compile error. Read #message for details.<br>
  693. -1 to -23 is thrown during a search. Read #message for details.
  694. End Rem
  695. Field num:Int
  696. Rem
  697. bbdoc: The error text.
  698. End Rem
  699. Field message:String
  700. Function Raise:TRegExException(num:Int, message:String = Null)
  701. Local this:TRegExException = New TRegExException
  702. If message Then
  703. this.message = message
  704. Else
  705. this.message = this.getFromNum(num)
  706. End If
  707. this.num = num
  708. Return this
  709. End Function
  710. Rem
  711. bbdoc: Returns the exception as a String.
  712. End Rem
  713. Method toString:String()
  714. Return "( " + num + " ) " + message
  715. End Method
  716. Method getFromNum:String(err:Int)
  717. Select err
  718. Case -1
  719. Return "No Match"
  720. Case -2
  721. Return "PCRE2_ERROR_PARTIAL"
  722. Case -3
  723. Return "PCRE2_ERROR_UTF8_ERR1"
  724. Case -4
  725. Return "PCRE2_ERROR_UTF8_ERR2"
  726. Case -5
  727. Return "PCRE2_ERROR_UTF8_ERR3"
  728. Case -6
  729. Return "PCRE2_ERROR_UTF8_ERR4"
  730. Case -7
  731. Return "PCRE2_ERROR_UTF8_ERR5"
  732. Case -8
  733. Return "PCRE2_ERROR_UTF8_ERR6"
  734. Case -9
  735. Return "PCRE2_ERROR_UTF8_ERR7"
  736. Case -10
  737. Return "PCRE2_ERROR_UTF8_ERR8"
  738. Case -11
  739. Return "PCRE2_ERROR_UTF8_ERR9"
  740. Case -12
  741. Return "PCRE2_ERROR_UTF8_ERR10"
  742. Case -13
  743. Return "PCRE2_ERROR_UTF8_ERR11"
  744. Case -14
  745. Return "PCRE2_ERROR_UTF8_ERR12"
  746. Case -15
  747. Return "PCRE2_ERROR_UTF8_ERR13"
  748. Case -16
  749. Return "PCRE2_ERROR_UTF8_ERR14"
  750. Case -17
  751. Return "PCRE2_ERROR_UTF8_ERR15"
  752. Case -18
  753. Return "PCRE2_ERROR_UTF8_ERR16"
  754. Case -19
  755. Return "PCRE2_ERROR_UTF8_ERR17"
  756. Case -20
  757. Return "PCRE2_ERROR_UTF8_ERR18"
  758. Case -21
  759. Return "PCRE2_ERROR_UTF8_ERR19"
  760. Case -22
  761. Return "PCRE2_ERROR_UTF8_ERR20"
  762. Case -23
  763. Return "PCRE2_ERROR_UTF8_ERR21"
  764. Case -24
  765. Return "PCRE2_ERROR_UTF16_ERR1"
  766. Case -25
  767. Return "PCRE2_ERROR_UTF16_ERR2"
  768. Case -26
  769. Return "PCRE2_ERROR_UTF16_ERR3"
  770. Case -27
  771. Return "PCRE2_ERROR_UTF32_ERR1"
  772. Case -28
  773. Return "PCRE2_ERROR_UTF32_ERR2"
  774. Case -29
  775. Return "PCRE2_ERROR_BADDATA"
  776. Case -30
  777. Return "PCRE2_ERROR_BADLENGTH"
  778. Case -31
  779. Return "PCRE2_ERROR_BADMAGIC"
  780. Case -32
  781. Return "PCRE2_ERROR_BADMODE"
  782. Case -33
  783. Return "PCRE2_ERROR_BADOFFSET"
  784. Case -34
  785. Return "PCRE2_ERROR_BADOPTION"
  786. Case -35
  787. Return "PCRE2_ERROR_BADREPLACEMENT"
  788. Case -36
  789. Return "PCRE2_ERROR_BADUTFOFFSET"
  790. Case -37
  791. Return "PCRE2_ERROR_CALLOUT"
  792. Case -38
  793. Return "PCRE2_ERROR_DFA_BADRESTART"
  794. Case -39
  795. Return "PCRE2_ERROR_DFA_RECURSE"
  796. Case -40
  797. Return "PCRE2_ERROR_DFA_UCOND"
  798. Case -41
  799. Return "PCRE2_ERROR_DFA_UFUNC"
  800. Case -42
  801. Return "PCRE2_ERROR_DFA_UITEM"
  802. Case -43
  803. Return "PCRE2_ERROR_DFA_WSSIZE"
  804. Case -44
  805. Return "PCRE2_ERROR_INTERNAL"
  806. Case -45
  807. Return "PCRE2_ERROR_JIT_BADOPTION"
  808. Case -46
  809. Return "PCRE2_ERROR_JIT_STACKLIMIT"
  810. Case -47
  811. Return "PCRE2_ERROR_MATCHLIMIT"
  812. Case -48
  813. Return "PCRE2_ERROR_NOMEMORY"
  814. Case -49
  815. Return "PCRE2_ERROR_NOSUBSTRING"
  816. Case -50
  817. Return "PCRE2_ERROR_NOUNIQUESUBSTRING"
  818. Case -51
  819. Return "PCRE2_ERROR_NULL"
  820. Case -52
  821. Return "PCRE2_ERROR_RECURSELOOP"
  822. Case -53
  823. Return "PCRE2_ERROR_RECURSIONLIMIT"
  824. Case -54
  825. Return "PCRE2_ERROR_UNAVAILABLE"
  826. Case -55
  827. Return "PCRE2_ERROR_UNSET"
  828. End Select
  829. End Method
  830. End Type