csv.bmx 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. ' Copyright (c) 2022-2024 Bruce A Henderson
  2. '
  3. ' Permission is hereby granted, free of charge, to any person obtaining a copy
  4. ' of this software and associated documentation files (the "Software"), to deal
  5. ' in the Software without restriction, including without limitation the rights
  6. ' to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. ' copies of the Software, and to permit persons to whom the Software is
  8. ' furnished to do so, subject to the following conditions:
  9. '
  10. ' The above copyright notice and this permission notice shall be included in
  11. ' all copies or substantial portions of the Software.
  12. '
  13. ' THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. ' IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. ' FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. ' AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. ' LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. ' OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. ' THE SOFTWARE.
  20. '
  21. SuperStrict
  22. Rem
  23. bbdoc: A CSV parser.
  24. End Rem
  25. Module Text.CSV
  26. ModuleInfo "Version: 1.05"
  27. ModuleInfo "Author: Bruce A Henderson"
  28. ModuleInfo "License: MIT"
  29. ModuleInfo "zsv - Copyright (c) 2021 Guarnerix Inc dba Liquidaty"
  30. ModuleInfo "Copyright: 2022-2024 Bruce A Henderson"
  31. ModuleInfo "History: 1.05"
  32. ModuleInfo "History: Updated to zsv 0.3.9.e64543a"
  33. ModuleInfo "History: 1.04"
  34. ModuleInfo "History: Updated to zsv 0.3.8.93d349e"
  35. ModuleInfo "History: 1.03"
  36. ModuleInfo "History: Updated to zsv 0.3.6.fdcd18e"
  37. ModuleInfo "History: 1.02"
  38. ModuleInfo "History: Updated to zsv 0.3.5.fd7b11a"
  39. ModuleInfo "History: 1.01"
  40. ModuleInfo "History: Updated to latest zsv."
  41. ModuleInfo "History: 1.00"
  42. ModuleInfo "History: Initial Release"
  43. ModuleInfo "CC_OPTS: -DVERSION=\~qv0.3.5\~q"
  44. ModuleInfo "CC_OPTS: -DZSV_EXTRAS=1"
  45. Import "common.bmx"
  46. Rem
  47. bbdoc: Csv Parser
  48. about: The parser expects at least 1 header row. If a file does not have a header row, one can be
  49. provided by setting #TCsvOptions.insertHeaderRow with an appropriate value.
  50. End Rem
  51. Type TCsvParser
  52. Private
  53. Field zsvPtr:Byte Ptr
  54. Field opts:TCsvOpts
  55. Field stream:TStream
  56. Field shouldCloseStream:Int
  57. Field row:TCsvRow
  58. Method New(stream:TStream, options:TCsvOptions)
  59. If Not options Then
  60. options = New TCsvOptions
  61. End If
  62. Self.opts = New TCsvOpts(Self, options)
  63. Self.stream = stream
  64. zsvPtr = zsv_new(opts.optsPtr)
  65. row = New TCsvRow(zsvPtr)
  66. ReadHeader()
  67. End Method
  68. Method ReadHeader()
  69. If NextRow() = ECsvStatus.row Then
  70. Local count:Size_T = row.ColumnCount()
  71. Local header:TCsvHeader = New TCsvHeader(count)
  72. For Local i:Int = 0 Until count
  73. Local col:SCsvColumn = row.GetColumn(i)
  74. Local v:String = col.GetValue()
  75. If v Then
  76. header.Put(v, i)
  77. End If
  78. Next
  79. row.header = header
  80. End If
  81. End Method
  82. Public
  83. Rem
  84. bbdoc: Creates a new #TCsvParser instance, using the given path.
  85. End Rem
  86. Function Parse:TCsvParser(path:String, opts:TCsvOptions)
  87. Local stream:TStream = ReadStream(path)
  88. If stream Then
  89. Local csv:TCsvParser = Parse(stream, opts)
  90. csv.shouldCloseStream = True
  91. Return csv
  92. End If
  93. End Function
  94. Rem
  95. bbdoc: Creates a new #TCsvParser instance, using the provided #TStream.
  96. End Rem
  97. Function Parse:TCsvParser(stream:TStream, options:TCsvOptions = Null)
  98. Return New TCsvParser(stream, options)
  99. End Function
  100. Rem
  101. bbdoc: Fetches the next row.
  102. returns: Returns ECsvStatus.row when a row is retrieved.
  103. End Rem
  104. Method NextRow:ECsvStatus()
  105. Return zsv_next_row(zsvPtr)
  106. End Method
  107. Rem
  108. bbdoc: Returns the current row.
  109. about: The cell values are only valid until the next call to #NextRow.
  110. End Rem
  111. Method GetRow:TCsvRow()
  112. Return row
  113. End Method
  114. Rem
  115. bbdoc: Frees the parser and any associated data.
  116. End Rem
  117. Method Free()
  118. If zsvPtr Then
  119. If shouldCloseStream Then
  120. stream.Close()
  121. End If
  122. zsv_delete(zsvPtr)
  123. zsvPtr = Null
  124. End If
  125. End Method
  126. Method Delete()
  127. Free()
  128. End Method
  129. Private
  130. Method ReadStream:Size_T(data:Byte Ptr, n:Size_T, size:Size_T)
  131. Return stream.Read(data, n * size)
  132. End Method
  133. Function _ReadStream:Size_T(data:Byte Ptr, n:Size_T, size:Size_T, csv:TCsvParser) { nomangle }
  134. Return csv.ReadStream(data, n, size)
  135. End Function
  136. End Type
  137. Rem
  138. bbdoc: Options for customising the parser.
  139. End Rem
  140. Type TCsvOptions
  141. Rem
  142. bbdoc: Maximum number of columns to parse.
  143. about: Defaults to 1024.
  144. End Rem
  145. Field maxColumns:UInt = 1024
  146. Rem
  147. bbdoc: The delimiter.
  148. about: Typically a comma or tab. Can be any char other than newline, form feed or quote. Defaults to comma.
  149. End Rem
  150. Field delimiter:String = ","
  151. Rem
  152. bbdoc: When enabled, indicates that the parser should treat double-quotes just like any ordinary character.
  153. End Rem
  154. Field noQuotes:Int
  155. Rem
  156. bbdoc: If the actual data does not have a header row with column names, the caller should provide one (in CSV format) which will be treated as if it was the first row of data.
  157. End Rem
  158. Field insertHeaderRow:String
  159. Rem
  160. bbdoc: The number of rows that the header row spans.
  161. about: If 0 or 1, header is assumed to span 1 row otherwise, set to number > 1 to span multiple rows.
  162. End Rem
  163. Field headerSpan:UInt = 1
  164. Rem
  165. bbdoc: The number of rows to ignore before the initial row is processed.
  166. End Rem
  167. Field rowsToIgnore:UInt
  168. Rem
  169. bbdoc: By default, ignores empty header rows.
  170. about: To disable this behaviour, set to #True.
  171. End Rem
  172. Field keepEmptyHeaderRows:UInt
  173. End Type
  174. Private
  175. Type TCsvOpts
  176. Field optsPtr:Byte Ptr
  177. Field headerRow:Byte Ptr
  178. Method New(csv:TCsvParser, options:TCsvOptions)
  179. optsPtr = bmx_csv_opts_new(csv)
  180. If options.insertHeaderRow Then
  181. headerRow = options.insertHeaderRow.ToUTF8String()
  182. End If
  183. bmx_csv_opts_set_options(optsPtr, options.maxColumns, Asc(options.delimiter), options.noQuotes, headerRow, options.headerSpan, options.rowsToIgnore, options.keepEmptyHeaderRows)
  184. End Method
  185. Method Delete()
  186. If headerRow Then
  187. MemFree(headerRow)
  188. headerRow = Null
  189. End If
  190. If optsPtr Then
  191. bmx_csv_opts_free(optsPtr)
  192. optsPtr = Null
  193. End If
  194. End Method
  195. End Type
  196. Public
  197. Rem
  198. bbdoc: A Csv Row.
  199. End Rem
  200. Type TCsvRow
  201. Private
  202. Field zsvPtr:Byte Ptr
  203. Field header:TCsvHeader
  204. Method New(zsvPtr:Byte Ptr)
  205. Self.zsvPtr = zsvPtr
  206. End Method
  207. Public
  208. Rem
  209. bbdoc: Returns the number of columns in the row.
  210. End Rem
  211. Method ColumnCount:Size_T()
  212. Return zsv_cell_count(zsvPtr)
  213. End Method
  214. Rem
  215. bbdoc: Returns the column at the given index.
  216. End Rem
  217. Method GetColumn:SCsvColumn(index:Size_T)
  218. Return zsv_get_cell(zsvPtr, index)
  219. End Method
  220. Rem
  221. bbdoc: Returns the column at the given index.
  222. End Rem
  223. Method GetColumn:SCsvColumn(index:Int)
  224. Return zsv_get_cell(zsvPtr, Size_T(index))
  225. End Method
  226. Rem
  227. bbdoc: Returns the column with the given column @name.
  228. End Rem
  229. Method GetColumn:SCsvColumn(name:String)
  230. If header Then
  231. Local index:Int = header.IndexForName(name)
  232. If index >= 0 Then
  233. Return zsv_get_cell(zsvPtr, Size_T(index))
  234. End If
  235. End If
  236. Return Null
  237. End Method
  238. Rem
  239. bbdoc: Returns the value of the column at the given @index.
  240. End Rem
  241. Method GetValue:String(index:Size_T)
  242. Return zsv_get_cell(zsvPtr, index).GetValue()
  243. End Method
  244. Rem
  245. bbdoc: Returns the value of the given column @name.
  246. End Rem
  247. Method GetValue:String(name:String)
  248. Return GetColumn(name).GetValue()
  249. ENd Method
  250. Rem
  251. bbdoc: Returns the header.
  252. End Rem
  253. Method GetHeader:TCsvHeader()
  254. Return header
  255. End Method
  256. Method Abort()
  257. zsv_abort(zsvPtr)
  258. End Method
  259. End Type
  260. Rem
  261. bbdoc: A csv header.
  262. about: A header consists of 1 or more rows.
  263. End Rem
  264. Type TCsvHeader
  265. Field cols:String[]
  266. Field map:TStringMap = New TStringMap
  267. Private
  268. Method New(count:Size_T)
  269. cols = New String[count]
  270. End Method
  271. Method Put(col:String, index:Int)
  272. cols[index] = col
  273. map.Insert(col, New TInt(index))
  274. End Method
  275. Public
  276. Rem
  277. bbdoc: Returns the column index for the column header @name.
  278. End Rem
  279. Method IndexForName:Int(name:String)
  280. Local value:TInt = TInt(map.ValueForKey(name))
  281. If value Then
  282. Return value.value
  283. Else
  284. Return -1
  285. End If
  286. End Method
  287. Rem
  288. bbdoc: Returns the number of header columns.
  289. End Rem
  290. Method ColumnCount:Size_T()
  291. Return cols.Length
  292. End Method
  293. Rem
  294. bbdoc: Returns the header for the given @index.
  295. End Rem
  296. Method GetHeader:String(index:Size_T)
  297. If index < cols.Length Then
  298. Return cols[index]
  299. End If
  300. End Method
  301. End Type
  302. Private
  303. Type TInt
  304. Field value:Int
  305. Method New(value:Int)
  306. Self.value = value
  307. End Method
  308. End Type