regex.bmx 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003
  1. ' Copyright (c) 2007-2025 Bruce A Henderson
  2. ' All rights reserved.
  3. '
  4. ' Redistribution and use in source and binary forms, with or without
  5. ' modification, are permitted provided that the following conditions are met:
  6. ' * Redistributions of source code must retain the above copyright
  7. ' notice, this list of conditions and the following disclaimer.
  8. ' * Redistributions in binary form must reproduce the above copyright
  9. ' notice, this list of conditions and the following disclaimer in the
  10. ' documentation and/or other materials provided with the distribution.
  11. ' * Neither the name of Bruce A Henderson nor the
  12. ' names of its contributors may be used to endorse or promote products
  13. ' derived from this software without specific prior written permission.
  14. '
  15. ' THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
  16. ' EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  17. ' WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. ' DISCLAIMED. IN NO EVENT SHALL Bruce A Henderson BE LIABLE FOR ANY
  19. ' DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  20. ' (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  21. ' LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  22. ' ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. ' (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  24. ' SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. '
  26. SuperStrict
  27. Rem
  28. bbdoc: Regular Expressions
  29. End Rem
  30. Module Text.RegEx
  31. ModuleInfo "Version: 1.13"
  32. ModuleInfo "Author: PCRE - Philip Hazel"
  33. ModuleInfo "License: BSD"
  34. ModuleInfo "Copyright: PCRE - 1997-2021 University of Cambridge"
  35. ModuleInfo "Copyright: Wrapper - 2007-2025 Bruce A Henderson"
  36. ModuleInfo "History: 1.13"
  37. ModuleInfo "History: Fixed free of pcre pointer."
  38. ModuleInfo "History: 1.12"
  39. ModuleInfo "History: Updated to PCRE 10.43"
  40. ModuleInfo "History: Options are now configured per search. Default options are used if none provided."
  41. ModuleInfo "History: Added SetDefaultOptions method."
  42. ModuleInfo "History: Options can be ignored in preference for pattern provided options."
  43. ModuleInfo "History: 1.11"
  44. ModuleInfo "History: Updated to PCRE 10.39"
  45. ModuleInfo "History: 1.10"
  46. ModuleInfo "History: Updated to PCRE 10.31"
  47. ModuleInfo "History: 1.09"
  48. ModuleInfo "History: Fixed issue using wrong ovector offset."
  49. ModuleInfo "History: 1.08"
  50. ModuleInfo "History: Updated to PCRE 10.30"
  51. ModuleInfo "History: 1.07"
  52. ModuleInfo "History: Updated for 64-bit."
  53. ModuleInfo "History: 1.06"
  54. ModuleInfo "History: Updated to PCRE 10.00"
  55. ModuleInfo "History: Changed TRegExMatch to get data as required, rather than cache it."
  56. ModuleInfo "History: Added support for JIT compilation via new options."
  57. ModuleInfo "History: Added ByName methods for sub expression retrieval."
  58. ModuleInfo "History: 1.05"
  59. ModuleInfo "History: Updated to PCRE 8.34"
  60. ModuleInfo "History: Changed to use pcre16 functions, which is BlitzMax's native character size. (no more utf8 conversions)"
  61. ModuleInfo "History: Don't include the zero-termination character in sub expressions."
  62. ModuleInfo "History: 1.04"
  63. ModuleInfo "History: Updated to PCRE 8.0"
  64. ModuleInfo "History: Fixed offset problems when working with non-ascii text."
  65. ModuleInfo "History: Fixed Replace() where loop was overflowing."
  66. ModuleInfo "History: Added test_08 for utf-8."
  67. ModuleInfo "History: 1.03"
  68. ModuleInfo "History: Updated to PCRE 7.4"
  69. ModuleInfo "History: 1.02"
  70. ModuleInfo "History: Added grable's subquery/replace tweak."
  71. ModuleInfo "History: 1.01"
  72. ModuleInfo "History: Options now global."
  73. ModuleInfo "History: 1.00"
  74. ModuleInfo "History: Initial Release. (PCRE 7.0)"
  75. ModuleInfo "CC_OPTS: -DHAVE_CONFIG_H -DPCRE2_CODE_UNIT_WIDTH=16"
  76. ?macos
  77. ModuleInfo "CC_VOPT: osversion|-mmacosx-version-min=11.00"
  78. ?
  79. Import "common.bmx"
  80. Rem
  81. bbdoc: Performs #Find and #Replace / #ReplaceAll on strings using Perl Compatible Regular Expressions.
  82. End Rem
  83. Type TRegEx
  84. ' Configures the way in which Regular Expressions are handled.
  85. Global defaultOptions:TRegExOptions
  86. ' The options to use for this search.
  87. Field options:TRegExOptions
  88. ' The pattern to search for.
  89. Field searchPattern:String
  90. ' The replacement pattern string.
  91. 'Field replacePattern:String
  92. Field lastPattern:String
  93. Field lastTarget:String
  94. ' pointer to the target string (as a WString)
  95. Field TArg:Byte Ptr
  96. ' the length of the target string
  97. Field targLength:Size_T
  98. Field lastEndPos:Size_T
  99. ' pointer to the compiled expression
  100. Field pcre:Byte Ptr
  101. ' pointer to the offsets vector, owned by pcre2
  102. Field offsets:Size_T Ptr
  103. ' number of offsets
  104. Field sizeOffsets:Int
  105. Field matchPtr:Byte Ptr
  106. Field compiled:Int = False
  107. Method Delete()
  108. If TArg Then
  109. MemFree(TArg)
  110. TArg = Null
  111. End If
  112. If matchPtr Then
  113. pcre2_match_data_free_16(matchPtr)
  114. matchPtr = Null
  115. End If
  116. If pcre
  117. pcre2_code_free_16(pcre)
  118. pcre = Null
  119. EndIf
  120. End Method
  121. Rem
  122. bbdoc: Creates a new #TRegEx object.
  123. about: @searchPattern is the regular expression with which to perform the search.
  124. End Rem
  125. Method New(searchPattern:String, options:TRegExOptions = Null)
  126. Self.searchPattern = searchPattern
  127. If options Then
  128. Self.options = options
  129. Else
  130. If Not defaultOptions Then
  131. defaultOptions = New TRegExOptions
  132. End If
  133. Self.options = defaultOptions
  134. End If
  135. End Method
  136. Rem
  137. bbdoc: Creates a new #TRegEx object.
  138. about: @searchPattern is the regular expression with which to perform the search.
  139. End Rem
  140. Function Create:TRegEx(searchPattern:String, options:TRegExOptions = Null)
  141. Return New TRegEx(searchPattern, options)
  142. End Function
  143. Rem
  144. bbdoc: Sets the default options for all new #TRegEx objects.
  145. about: This is useful if you want to set the options once and use them for all searches.
  146. End Rem
  147. Function SetDefaultOptions(options:TRegExOptions)
  148. defaultOptions = options
  149. End Function
  150. Rem
  151. bbdoc: Replaces all occurances of the search Pattern with @replaceWith on @target, from @startPos.
  152. returns: The newly replaced string.
  153. about: Doesn't affect the original @target contents.
  154. End Rem
  155. Method ReplaceAll:String(target:String, replaceWith:String, startPos:Size_T = 0)
  156. If Not options Then
  157. options = New TRegExOptions
  158. End If
  159. ' remember the current setting
  160. Local oldOpt:Int = options.replaceAllMatches
  161. ' enable global replace
  162. options.replaceAllMatches = True
  163. ' let Replace do all the work
  164. Local s:String = Replace(target, replaceWith, startPos)
  165. ' put back the "current" setting
  166. options.replaceAllMatches = oldOpt
  167. Return s
  168. End Method
  169. Rem
  170. bbdoc: Replaces the first occurance of the search Pattern with @replaceWith on @target, from @startPos.
  171. returns: The newly replaced string.
  172. about: To access a specific subquery during the replace, you can use the \n syntax in @replaceWith, where \0
  173. refers to the whole match, and \1 refers to the first subquery/group, and so on. <i>(see
  174. <a href="../tests/test_07.bmx">test_07</a> for an example)</i>.
  175. <p>Doesn't affect the original @target contents.</p>
  176. End Rem
  177. Method Replace:String(target:String, replaceWith:String, startPos:Size_T = 0)
  178. If Not options Then
  179. options = New TRegExOptions
  180. End If
  181. Local globalReplace:Int = options.replaceAllMatches
  182. ' the search pattern has changed !
  183. ' recompile
  184. ' this only happens very occasionally... (probably ;-)
  185. 'If lastPattern <> searchPattern Or Not pcre Then
  186. If Not compiled Then
  187. init()
  188. End If
  189. Local retString:String
  190. ' this is a new target...
  191. If target <> lastTarget Then
  192. lastTarget = target
  193. If TArg Then
  194. MemFree(TArg)
  195. End If
  196. TArg = target.toWString()
  197. targLength = target.length
  198. End If
  199. ' initial search...
  200. Local result:Int = pcre2_match_16(pcre, TArg, targLength, startPos, getExecOpt(), matchPtr, Null)
  201. ' if there wasn't an error... process the match (even for no-match)
  202. While result >= 0 Or result = PCRE2_ERROR_NOMATCH
  203. sizeOffsets = pcre2_get_ovector_count_16(matchPtr)
  204. offsets = pcre2_get_ovector_pointer_16(matchPtr)
  205. Local replaceStr:String = replaceWith
  206. Local ofs:Size_T Ptr = offsets
  207. For Local i:Int = 0 Until result
  208. Local idx:Int = i * 2
  209. replaceStr = replaceStr.Replace( "\" + i, lastTarget[ofs[idx]..ofs[idx+1]])
  210. Next
  211. If result > 0 Then
  212. ' add text so far, and the replacement
  213. retString:+ lastTarget[startPos..offsets[0]] + replaceStr
  214. Else
  215. ' search finished. Fill to the end
  216. retString:+ lastTarget[startPos..targLength]
  217. Exit
  218. End If
  219. ' set start to the end of the last match position
  220. startPos = offsets[1]
  221. ' only doing the first replace? Then we can exit now...
  222. If Not globalReplace Then
  223. ' all done. Fill to the end
  224. retString:+ lastTarget[startPos..targLength]
  225. Exit
  226. End If
  227. ' find the next match
  228. result = pcre2_match_16(pcre, TArg, targLength, startPos, getExecOpt(), matchPtr, Null)
  229. Wend
  230. Return retString ' convert back to max string
  231. End Method
  232. Rem
  233. bbdoc: Performs a search on the given @target from @startPos, using the search Pattern.
  234. returns: A #TRegExMatch object or Null if no matches found.
  235. about: If @target is <b>not</b> set, the search will use the <b>previous</b> @target.
  236. You will want to set @target the first time this method is called.<br>
  237. If you call this method with no parameters it will start the search from the end of the last search, effectively
  238. iterating through the target string.
  239. End Rem
  240. Method Find:TRegExMatch(target:String = Null)
  241. If Not options Then
  242. options = New TRegExOptions
  243. End If
  244. ' the search pattern has changed !
  245. ' recompile
  246. ' this only happens very occasionally... (probably ;-)
  247. 'If lastPattern <> searchPattern Or Not pcre Then
  248. If Not compiled Then
  249. init()
  250. End If
  251. Local startPos:Size_T
  252. ' no target specified, we are probably performing another search on the original target
  253. If Not target Then
  254. ' no lastTarget? Not allowed.
  255. If Not lastTarget Then
  256. Return Null
  257. End If
  258. startPos = lastEndPos
  259. Else
  260. ' this is a new target...
  261. If target <> lastTarget Then
  262. lastTarget = target
  263. If TArg Then
  264. MemFree(TArg)
  265. End If
  266. TArg = target.toWString()
  267. targLength = target.length
  268. End If
  269. End If
  270. Return DoFind(startPos)
  271. End Method
  272. Method DoFind:TRegExMatch(startPos:Size_T)
  273. Local result:Int = pcre2_match_16(pcre, TArg, targLength, startPos, getExecOpt(), matchPtr, Null)
  274. If result >= 0 Then
  275. sizeOffsets = pcre2_get_ovector_count_16(matchPtr)
  276. offsets = pcre2_get_ovector_pointer_16(matchPtr)
  277. Local match:TRegExMatch = New TRegExMatch(pcre, matchPtr)
  278. lastEndPos = offsets[1]
  279. Return match
  280. Else
  281. ' no point raising an exception when nothing found... we can just return a null object
  282. If result = PCRE2_ERROR_NOMATCH Then
  283. Return Null
  284. End If
  285. ' there was an error of some kind... throw it!
  286. Throw TRegExException.Raise(result)
  287. End If
  288. End Method
  289. Rem
  290. bbdoc: Performs a search on the given @target from @startPos, using the search Pattern.
  291. returns: A #TRegExMatch object or Null if no matches found.
  292. End Rem
  293. Method Find:TRegExMatch(target:String, startPos:Size_T)
  294. If Not options Then
  295. options = New TRegExOptions
  296. End If
  297. ' the search pattern has changed !
  298. ' recompile
  299. ' this only happens very occasionally... (probably ;-)
  300. 'If lastPattern <> searchPattern Or Not pcre Then
  301. If Not compiled Then
  302. init()
  303. End If
  304. ' no target specified, we are probably performing another search on the original target
  305. If Not target Then
  306. ' no lastTarget? Not allowed.
  307. If Not lastTarget Then
  308. Return Null
  309. End If
  310. ' no startPos? then we'll start from the end of the last search point
  311. If startPos < 0 Then
  312. startPos = lastEndPos
  313. End If
  314. Else
  315. ' this is a new target...
  316. If target <> lastTarget Then
  317. lastTarget = target
  318. If TArg Then
  319. MemFree(TArg)
  320. End If
  321. TArg = target.toWString()
  322. targLength = target.length
  323. End If
  324. End If
  325. ' set the startPos to 0 if not already set
  326. If startPos < 0 Then
  327. startPos = 0
  328. End If
  329. Return DoFind(startPos)
  330. End Method
  331. ' resets and recompiles the regular expression
  332. Method init()
  333. lastPattern = searchPattern
  334. Local pat:Short Ptr = lastPattern.ToWString()
  335. Local errorcode:Int
  336. Local erroffset:Size_T
  337. Local bptr:Byte Ptr = pcre2_compile_16(pat, Size_T(lastPattern.length), getCompileOpt(), Varptr errorcode, ..
  338. Varptr erroffset, Null)
  339. MemFree(pat)
  340. If bptr Then
  341. If pcre
  342. pcre2_code_free_16(pcre)
  343. EndIf
  344. pcre = bptr
  345. Else
  346. Local buffer:Short[256]
  347. pcre2_get_error_message_16(errorcode, buffer, 256)
  348. Throw TRegExException.Raise(-99, String.fromWString(buffer))
  349. End If
  350. Local jitOptions:Int = getJITOpt()
  351. If jitOptions Then
  352. Local result:Int = pcre2_jit_compile_16(pcre, jitOptions)
  353. End If
  354. If matchPtr Then
  355. pcre2_match_data_free_16(matchPtr)
  356. End If
  357. matchPtr = pcre2_match_data_create_from_pattern_16(pcre, Null)
  358. compiled = True
  359. End Method
  360. ' possible options for compiling
  361. Method getCompileOpt:Int()
  362. Local opt:Int = PCRE2_UTF
  363. If options.onlyPatternOptions Then
  364. Return opt
  365. End If
  366. If Not options.caseSensitive Then
  367. opt:| PCRE2_CASELESS
  368. End If
  369. If options.dotMatchAll Then
  370. opt:| PCRE2_DOTALL
  371. End If
  372. If Not options.greedy Then
  373. opt:| PCRE2_UNGREEDY
  374. End If
  375. Select options.lineEndType
  376. Case 1
  377. opt:| PCRE2_NEWLINE_CR
  378. Case 2
  379. opt:| PCRE2_NEWLINE_LF
  380. Case 3
  381. opt:| PCRE2_NEWLINE_CRLF
  382. Default
  383. opt:| PCRE2_NEWLINE_ANY
  384. End Select
  385. If Not options.matchEmpty Then
  386. opt:| PCRE2_NOTEMPTY
  387. End If
  388. If options.targetIsMultiline Then
  389. opt:| PCRE2_MULTILINE
  390. End If
  391. If options.dollarEndOnly Then
  392. opt:| PCRE2_DOLLAR_ENDONLY
  393. End If
  394. If options.extended Then
  395. opt:| PCRE2_EXTENDED
  396. End If
  397. Return opt
  398. End Method
  399. ' possible options for execution
  400. Method getExecOpt:Int()
  401. Local opt:Int
  402. If options.onlyPatternOptions Then
  403. Return opt
  404. End If
  405. Select options.lineEndType
  406. Case 1
  407. opt:| PCRE2_NEWLINE_CR
  408. Case 2
  409. opt:| PCRE2_NEWLINE_LF
  410. Case 3
  411. opt:| PCRE2_NEWLINE_CRLF
  412. Default
  413. opt:| PCRE2_NEWLINE_ANY
  414. End Select
  415. If Not options.matchEmpty Then
  416. opt:| PCRE2_NOTEMPTY
  417. End If
  418. If Not options.stringIsLineBeginning Then
  419. opt:| PCRE2_NOTBOL
  420. End If
  421. If Not options.stringIsLineEnding Then
  422. opt:| PCRE2_NOTEOL
  423. End If
  424. Return opt
  425. End Method
  426. ' possible options for jit
  427. Method getJITOpt:Int()
  428. Local opt:Int
  429. If options.onlyPatternOptions Then
  430. Return opt
  431. End If
  432. If options.jitComplete Then
  433. opt :| PCRE2_JIT_COMPLETE
  434. End If
  435. If options.jitPartialSoft Then
  436. opt :| PCRE2_JIT_PARTIAL_SOFT
  437. End If
  438. If options.jitPartialHard Then
  439. opt :| PCRE2_JIT_PARTIAL_HARD
  440. End If
  441. End Method
  442. Rem
  443. bbdoc: Returns which optional features are available.
  444. End Rem
  445. Function Config:Int(what:Int, where_:Int Var)
  446. If what <> PCRE2_CONFIG_UNICODE_VERSION And what <> PCRE2_CONFIG_VERSION Then
  447. Return pcre2_config_16(what, Varptr where_)
  448. End If
  449. Return PCRE2_ERROR_BADOPTION
  450. End Function
  451. End Type
  452. Rem
  453. bbdoc: Used to extract the matched string when doing a search with regular expressions.
  454. End Rem
  455. Type TRegExMatch
  456. Private
  457. Field pcre:Byte Ptr
  458. Field matchPtr:Byte Ptr
  459. Field count:UInt
  460. Method New(pcre:Byte Ptr, matchPtr:Byte Ptr)
  461. Self.pcre = pcre
  462. Self.matchPtr = matchPtr
  463. Self.count = pcre2_get_ovector_count_16(matchPtr)
  464. End Method
  465. Public
  466. Rem
  467. bbdoc: Returns the number of subexpressions as a result of the search.
  468. End Rem
  469. Method SubCount:Int()
  470. Return count
  471. End Method
  472. Rem
  473. bbdoc: Returns the subexpression for @matchNumber.
  474. returns: The matched string, the subexpression string, or "" if @matchNumber is out of range.
  475. about: For expressions with no subpattern groups, this method can be used without
  476. a parameter to return the matched string.
  477. End Rem
  478. Method SubExp:String(matchNumber:Int = 0)
  479. Local _subExpr:String
  480. If matchNumber >= 0 And matchNumber < count Then
  481. Local sPtr:Short Ptr
  482. Local sLen:Size_T
  483. Local result:Int = pcre2_substring_get_bynumber_16(matchPtr, matchNumber, Varptr sPtr, Varptr sLen)
  484. If Not result Then
  485. _subExpr = String.FromShorts(sPtr, Int(sLen))
  486. pcre2_substring_free_16(sPtr)
  487. End If
  488. End If
  489. Return _subExpr
  490. End Method
  491. Rem
  492. bbdoc: Returns the start position for subexpression @matchNumber.
  493. returns: The start position, or -1 if @matchNumber is out of range.
  494. about: For expressions with no subpattern groups, this method can be used without a parameter
  495. to return the start position of the matched string.
  496. End Rem
  497. Method SubStart:Int(matchNumber:Int = 0)
  498. If matchNumber >= 0 And matchNumber < count Then
  499. Local offsets:Size_T Ptr = pcre2_get_ovector_pointer_16(matchPtr)
  500. Return offsets[matchNumber]
  501. End If
  502. Return -1
  503. End Method
  504. Rem
  505. bbdoc: Returns the end position for subexpression @matchNumber.
  506. returns: The end position, or -1 if @matchNumber is out of range.
  507. about: For expressions with no subpattern groups, this method can be used without a parameter
  508. to return the end position of the matched string.
  509. End Rem
  510. Method SubEnd:Int(matchNumber:Int = 0)
  511. If matchNumber >= 0 And matchNumber < count Then
  512. Local offsets:Size_T Ptr = pcre2_get_ovector_pointer_16(matchPtr)
  513. Return offsets[matchNumber + 1] - 1
  514. End If
  515. Return -1
  516. End Method
  517. Rem
  518. bbdoc: Returns the subexpression for the given @name.
  519. returns: The matched string, the subexpression string, or "" if @matchNumber is out of range.
  520. End Rem
  521. Method SubExp:String(name:String)
  522. Return SubExpByName(name)
  523. End Method
  524. Rem
  525. bbdoc: Returns the subexpression for the given @name.
  526. returns: The matched string, the subexpression string, or "" if @matchNumber is out of range.
  527. End Rem
  528. Method SubExpByName:String(name:String)
  529. Local _subExpr:String
  530. If name Then
  531. Local sPtr:Short Ptr
  532. Local sLen:Size_T
  533. Local n:Short Ptr = name.ToWString()
  534. Local result:Int = pcre2_substring_get_byname_16(matchPtr, n, Varptr sPtr, Varptr sLen)
  535. MemFree(n)
  536. If Not result Then
  537. _subExpr = String.FromShorts(sPtr, Int(sLen))
  538. pcre2_substring_free_16(sPtr)
  539. End If
  540. End If
  541. Return _subExpr
  542. End Method
  543. Rem
  544. bbdoc: Returns the index of the subexpression for the given @name.
  545. End Rem
  546. Method SubIndex:Int(name:String)
  547. Return SubIndexByName(name)
  548. End Method
  549. Rem
  550. bbdoc: Returns the index of the subexpression for the given @name.
  551. End Rem
  552. Method SubIndexByName:Int(name:String)
  553. If name Then
  554. Local n:Short Ptr = name.ToWString()
  555. Local index:Int = pcre2_substring_number_from_name_16(pcre, n)
  556. MemFree(n)
  557. If index >= 0 Then
  558. Return index
  559. End If
  560. End If
  561. Return -1
  562. End Method
  563. Rem
  564. bbdoc: Returns the start position of the subexpression for the given @name.
  565. End Rem
  566. Method SubStart:Int(name:String)
  567. Return SubStartByName(name)
  568. End Method
  569. Rem
  570. bbdoc: Returns the start position of the subexpression for the given @name.
  571. End Rem
  572. Method SubStartByName:Int(name:String)
  573. If name Then
  574. Local n:Short Ptr = name.ToWString()
  575. Local index:Int = pcre2_substring_number_from_name_16(pcre, n)
  576. MemFree(n)
  577. If index >= 0 Then
  578. Local offsets:Size_T Ptr = pcre2_get_ovector_pointer_16(matchPtr)
  579. Return offsets[index]
  580. End If
  581. End If
  582. Return -1
  583. End Method
  584. Rem
  585. bbdoc: Returns the end position of the subexpression for the given @name.
  586. End Rem
  587. Method SubEnd:Int(name:String)
  588. Return SubEndByName(name)
  589. End Method
  590. Rem
  591. bbdoc: Returns the end position of the subexpression for the given @name.
  592. End Rem
  593. Method SubEndByName:Int(name:String)
  594. If name Then
  595. Local n:Short Ptr = name.ToWString()
  596. Local index:Int = pcre2_substring_number_from_name_16(pcre, n)
  597. MemFree(n)
  598. If index >= 0 Then
  599. Local offsets:Size_T Ptr = pcre2_get_ovector_pointer_16(matchPtr)
  600. Return offsets[index + 1] - 1
  601. End If
  602. End If
  603. Return -1
  604. End Method
  605. End Type
  606. Rem
  607. bbdoc: Specifies options used when performing searches.
  608. End Rem
  609. Type TRegExOptions
  610. Rem
  611. bbdoc: Ignore other options and use only the pattern's options, like case sensitivity, etc.
  612. End Rem
  613. Field onlyPatternOptions:Int = False
  614. Rem
  615. bbdoc: Whether matches are case sensitive.
  616. End Rem
  617. Field caseSensitive:Int = False
  618. Rem
  619. bbdoc: Allow dot (period) to match new lines as well as everything else.
  620. about: False indicates dot doesn't match new lines.
  621. End Rem
  622. Field dotMatchAll:Int = False
  623. Rem
  624. bbdoc: Greedy matches everything from the beginning of the first delimeter to the end of the last delimiter, and everything in between.
  625. about:
  626. End Rem
  627. Field greedy:Int = True
  628. Rem
  629. bbdoc: Determines how new lines are interpreted.
  630. about:
  631. <ul>
  632. <li>0 - any line ending</li>
  633. <li>1 - \r</li>
  634. <li>2 - \n</li>
  635. <li>3 - \r\n</li>
  636. </ul>
  637. End Rem
  638. Field lineEndType:Int = 0
  639. Rem
  640. bbdoc: Allow patterns to match empty strings.
  641. End Rem
  642. Field matchEmpty:Int = True
  643. 'Rem
  644. 'bbdoc: Indicates whether all occurances of matches will be replaced.
  645. 'about: Only applicable during a Replace.
  646. 'End Rem
  647. Field replaceAllMatches:Int = False
  648. Rem
  649. bbdoc: Count the beginning of a string as the beginning of a line.
  650. End Rem
  651. Field stringIsLineBeginning:Int = True
  652. Rem
  653. bbdoc: Count the end of a string as the end of a line.
  654. End Rem
  655. Field stringIsLineEnding:Int = True
  656. Rem
  657. bbdoc: Matches internal new lines against ^ and $.
  658. about: Set to false to ignore internal new lines.
  659. End Rem
  660. Field targetIsMultiline:Int = True
  661. Rem
  662. bbdoc: Dollar ($) matches newline at end.
  663. about: Set to True for dollar to only match the end of the string, otherwise
  664. matches a newline before the end of the string.
  665. End Rem
  666. Field dollarEndOnly:Int = False
  667. Rem
  668. bbdoc: Ignore whitespace and # comments.
  669. about: When set to True, whitespace in the pattern (other than in a character class) and
  670. characters between a # outside a character class and the next newline are ignored.<br>
  671. An escaping backslash can be used to include a whitespace or # character as part of the pattern.
  672. End Rem
  673. Field extended:Int = False
  674. Rem
  675. bbdoc: Compile code for full matching.
  676. about: When set to True, the JIT compiler is enabled.
  677. End Rem
  678. Field jitComplete:Int = False
  679. Rem
  680. bbdoc: Compile code For soft partial matching.
  681. about: When set to True, the JIT compiler is enabled.
  682. End Rem
  683. Field jitPartialSoft:Int = False
  684. Rem
  685. bbdoc: Compile code for hard partial matching.
  686. about: When set to True, the JIT compiler is enabled.
  687. End Rem
  688. Field jitPartialHard:Int = False
  689. End Type
  690. Rem
  691. bbdoc: A Regular Expression exception.
  692. about: This can be thrown either during regular expression compilation (-99) or during a search (-1 to -23).
  693. End Rem
  694. Type TRegExException
  695. Rem
  696. bbdoc: The type of error thrown.
  697. about: -99 is a regular expression compile error. Read #message for details.<br>
  698. -1 to -23 is thrown during a search. Read #message for details.
  699. End Rem
  700. Field num:Int
  701. Rem
  702. bbdoc: The error text.
  703. End Rem
  704. Field message:String
  705. Function Raise:TRegExException(num:Int, message:String = Null)
  706. Local this:TRegExException = New TRegExException
  707. If message Then
  708. this.message = message
  709. Else
  710. this.message = this.getFromNum(num)
  711. End If
  712. this.num = num
  713. Return this
  714. End Function
  715. Rem
  716. bbdoc: Returns the exception as a String.
  717. End Rem
  718. Method toString:String()
  719. Return "( " + num + " ) " + message
  720. End Method
  721. Method getFromNum:String(err:Int)
  722. Select err
  723. Case -1
  724. Return "No Match"
  725. Case -2
  726. Return "PCRE2_ERROR_PARTIAL"
  727. Case -3
  728. Return "PCRE2_ERROR_UTF8_ERR1"
  729. Case -4
  730. Return "PCRE2_ERROR_UTF8_ERR2"
  731. Case -5
  732. Return "PCRE2_ERROR_UTF8_ERR3"
  733. Case -6
  734. Return "PCRE2_ERROR_UTF8_ERR4"
  735. Case -7
  736. Return "PCRE2_ERROR_UTF8_ERR5"
  737. Case -8
  738. Return "PCRE2_ERROR_UTF8_ERR6"
  739. Case -9
  740. Return "PCRE2_ERROR_UTF8_ERR7"
  741. Case -10
  742. Return "PCRE2_ERROR_UTF8_ERR8"
  743. Case -11
  744. Return "PCRE2_ERROR_UTF8_ERR9"
  745. Case -12
  746. Return "PCRE2_ERROR_UTF8_ERR10"
  747. Case -13
  748. Return "PCRE2_ERROR_UTF8_ERR11"
  749. Case -14
  750. Return "PCRE2_ERROR_UTF8_ERR12"
  751. Case -15
  752. Return "PCRE2_ERROR_UTF8_ERR13"
  753. Case -16
  754. Return "PCRE2_ERROR_UTF8_ERR14"
  755. Case -17
  756. Return "PCRE2_ERROR_UTF8_ERR15"
  757. Case -18
  758. Return "PCRE2_ERROR_UTF8_ERR16"
  759. Case -19
  760. Return "PCRE2_ERROR_UTF8_ERR17"
  761. Case -20
  762. Return "PCRE2_ERROR_UTF8_ERR18"
  763. Case -21
  764. Return "PCRE2_ERROR_UTF8_ERR19"
  765. Case -22
  766. Return "PCRE2_ERROR_UTF8_ERR20"
  767. Case -23
  768. Return "PCRE2_ERROR_UTF8_ERR21"
  769. Case -24
  770. Return "PCRE2_ERROR_UTF16_ERR1"
  771. Case -25
  772. Return "PCRE2_ERROR_UTF16_ERR2"
  773. Case -26
  774. Return "PCRE2_ERROR_UTF16_ERR3"
  775. Case -27
  776. Return "PCRE2_ERROR_UTF32_ERR1"
  777. Case -28
  778. Return "PCRE2_ERROR_UTF32_ERR2"
  779. Case -29
  780. Return "PCRE2_ERROR_BADDATA"
  781. Case -30
  782. Return "PCRE2_ERROR_BADLENGTH"
  783. Case -31
  784. Return "PCRE2_ERROR_BADMAGIC"
  785. Case -32
  786. Return "PCRE2_ERROR_BADMODE"
  787. Case -33
  788. Return "PCRE2_ERROR_BADOFFSET"
  789. Case -34
  790. Return "PCRE2_ERROR_BADOPTION"
  791. Case -35
  792. Return "PCRE2_ERROR_BADREPLACEMENT"
  793. Case -36
  794. Return "PCRE2_ERROR_BADUTFOFFSET"
  795. Case -37
  796. Return "PCRE2_ERROR_CALLOUT"
  797. Case -38
  798. Return "PCRE2_ERROR_DFA_BADRESTART"
  799. Case -39
  800. Return "PCRE2_ERROR_DFA_RECURSE"
  801. Case -40
  802. Return "PCRE2_ERROR_DFA_UCOND"
  803. Case -41
  804. Return "PCRE2_ERROR_DFA_UFUNC"
  805. Case -42
  806. Return "PCRE2_ERROR_DFA_UITEM"
  807. Case -43
  808. Return "PCRE2_ERROR_DFA_WSSIZE"
  809. Case -44
  810. Return "PCRE2_ERROR_INTERNAL"
  811. Case -45
  812. Return "PCRE2_ERROR_JIT_BADOPTION"
  813. Case -46
  814. Return "PCRE2_ERROR_JIT_STACKLIMIT"
  815. Case -47
  816. Return "PCRE2_ERROR_MATCHLIMIT"
  817. Case -48
  818. Return "PCRE2_ERROR_NOMEMORY"
  819. Case -49
  820. Return "PCRE2_ERROR_NOSUBSTRING"
  821. Case -50
  822. Return "PCRE2_ERROR_NOUNIQUESUBSTRING"
  823. Case -51
  824. Return "PCRE2_ERROR_NULL"
  825. Case -52
  826. Return "PCRE2_ERROR_RECURSELOOP"
  827. Case -53
  828. Return "PCRE2_ERROR_RECURSIONLIMIT"
  829. Case -54
  830. Return "PCRE2_ERROR_UNAVAILABLE"
  831. Case -55
  832. Return "PCRE2_ERROR_UNSET"
  833. End Select
  834. End Method
  835. End Type