textstream.bmx 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. Strict
  2. Rem
  3. bbdoc: Streams/Text streams
  4. about:
  5. The Text Stream module allows you to load and save text in a number
  6. of formats: LATIN1, UTF8 and UTF16.
  7. The LATIN1 format uses a single byte to represent each character, and
  8. is therefore only capable of manipulating 256 distinct character values.
  9. The UTF8 and UTF16 formats are capable of manipulating up to 1114112
  10. character values, but will generally use greater storage space. In addition,
  11. many text processing applications are unable to handle UTF8 and UTF16 files.
  12. End Rem
  13. Module BRL.TextStream
  14. ModuleInfo "Version: 1.03 "
  15. ModuleInfo "Author: Mark Sibly"
  16. ModuleInfo "License: zlib/libpng"
  17. ModuleInfo "Copyright: Blitz Research Ltd"
  18. ModuleInfo "Modserver: BRL"
  19. ModuleInfo "History: 1.03 Release"
  20. ModuleInfo "History: Modified LoadText to handle stream URLs"
  21. ModuleInfo "History: 1.02 Release"
  22. ModuleInfo "History: Added LoadText, SaveText"
  23. ModuleInfo "History: Fixed UTF16LE=4"
  24. ModuleInfo "History: 1.01 Release"
  25. ModuleInfo "History: 1.00 Release"
  26. ModuleInfo "History: Added TextStream module"
  27. Import BRL.Stream
  28. Type TTextStream Extends TStreamWrapper
  29. '***** PUBLIC *****
  30. Const LATIN1=1
  31. Const UTF8=2
  32. Const UTF16BE=3
  33. Const UTF16LE=4
  34. Method Read( buf:Byte Ptr,count )
  35. For Local i=0 Until count
  36. If _bufcount=32 _FlushRead
  37. Local hi=_ReadByte()
  38. Local lo=_ReadByte()
  39. hi:-48;If hi>9 hi:-7
  40. lo:-48;If lo>9 lo:-7
  41. buf[i]=hi Shl 4 | lo
  42. _bufcount:+1
  43. Next
  44. Return count
  45. End Method
  46. Method Write( buf:Byte Ptr,count )
  47. For Local i=0 Until count
  48. Local hi=buf[i] Shr 4
  49. Local lo=buf[i] & $f
  50. hi:+48;If hi>57 hi:+7
  51. lo:+48;If lo>57 lo:+7
  52. _WriteByte hi
  53. _WriteByte lo
  54. _bufcount:+1
  55. If _bufcount=32 _FlushWrite
  56. Next
  57. Return count
  58. End Method
  59. Method ReadByte()
  60. _FlushRead
  61. Return Int( ReadLine() )
  62. End Method
  63. Method WriteByte( n )
  64. _FlushWrite
  65. WriteLine n
  66. End Method
  67. Method ReadShort()
  68. _FlushRead
  69. Return Int( ReadLine() )
  70. End Method
  71. Method WriteShort( n )
  72. _FlushWrite
  73. WriteLine n
  74. End Method
  75. Method ReadInt()
  76. _FlushRead
  77. Return Int( ReadLine() )
  78. End Method
  79. Method WriteInt( n )
  80. _FlushWrite
  81. WriteLine n
  82. End Method
  83. Method ReadLong:Long()
  84. _FlushRead
  85. Return Long( ReadLine() )
  86. End Method
  87. Method WriteLong( n:Long )
  88. _FlushWrite
  89. WriteLine n
  90. End Method
  91. Method ReadFloat:Float()
  92. _FlushRead
  93. Return Float( ReadLine() )
  94. End Method
  95. Method WriteFloat( n:Float )
  96. _FlushWrite
  97. WriteLine n
  98. End Method
  99. Method ReadDouble:Double()
  100. _FlushRead
  101. Return Double( ReadLine() )
  102. End Method
  103. Method WriteDouble( n:Double )
  104. _FlushWrite
  105. WriteLine n
  106. End Method
  107. Method ReadLine$()
  108. _FlushRead
  109. Local buf:Short[1024],i
  110. While Not Eof()
  111. Local n=ReadChar()
  112. If n=0 Exit
  113. If n=10 Exit
  114. If n=13 Continue
  115. If buf.length=i buf=buf[..i+1024]
  116. buf[i]=n
  117. i:+1
  118. Wend
  119. Return String.FromShorts(buf,i)
  120. End Method
  121. Method ReadFile$()
  122. _FlushRead
  123. Local buf:Short[1024],i
  124. While Not Eof()
  125. Local n=ReadChar()
  126. If buf.length=i buf=buf[..i+1024]
  127. buf[i]=n
  128. i:+1
  129. Wend
  130. Return String.FromShorts( buf,i )
  131. End Method
  132. Method WriteLine( str$ )
  133. _FlushWrite
  134. WriteString str
  135. WriteString "~r~n"
  136. End Method
  137. Method ReadString$( length )
  138. _FlushRead
  139. Local buf:Short[length]
  140. For Local i=0 Until length
  141. buf[i]=ReadChar()
  142. Next
  143. Return String.FromShorts(buf,length)
  144. End Method
  145. Method WriteString( str$ )
  146. _FlushWrite
  147. For Local i=0 Until str.length
  148. WriteChar str[i]
  149. Next
  150. End Method
  151. Method ReadChar()
  152. Local c=_ReadByte()
  153. Select _encoding
  154. Case LATIN1
  155. Return c
  156. Case UTF8
  157. If c<128 Return c
  158. Local d=_ReadByte()
  159. If c<224 Return (c-192)*64+(d-128)
  160. Local e=_ReadByte()
  161. If c<240 Return (c-224)*4096+(d-128)*64+(e-128)
  162. Case UTF16BE
  163. Local d=_ReadByte()
  164. Return c Shl 8 | d
  165. Case UTF16LE
  166. Local d=_ReadByte()
  167. Return d Shl 8 | c
  168. End Select
  169. End Method
  170. Method WriteChar( char )
  171. Assert char>=0 And char<=$ffff
  172. Select _encoding
  173. Case LATIN1
  174. _WriteByte char
  175. Case UTF8
  176. If char<128
  177. _WriteByte char
  178. Else If char<2048
  179. _WriteByte char/64 | 192
  180. _WriteByte char Mod 64 | 128
  181. Else
  182. _WriteByte char/4096 | 224
  183. _WriteByte char/64 Mod 64 | 128
  184. _WriteByte char Mod 64 | 128
  185. EndIf
  186. Case UTF16BE
  187. _WriteByte char Shr 8
  188. _WriteByte char
  189. Case UTF16LE
  190. _WriteByte char
  191. _WriteByte char Shr 8
  192. End Select
  193. End Method
  194. Function Create:TTextStream( stream:TStream,encoding )
  195. Local t:TTextStream=New TTextStream
  196. t._encoding=encoding
  197. t.SetStream stream
  198. Return t
  199. End Function
  200. '***** PRIVATE *****
  201. Method _ReadByte()
  202. Return Super.ReadByte()
  203. End Method
  204. Method _WriteByte( n )
  205. Super.WriteByte n
  206. End Method
  207. Method _FlushRead()
  208. If Not _bufcount Return
  209. Local n=_ReadByte()
  210. If n=13 n=_ReadByte()
  211. If n<>10 Throw "Malformed line terminator"
  212. _bufcount=0
  213. End Method
  214. Method _FlushWrite()
  215. If Not _bufcount Return
  216. _WriteByte 13
  217. _WriteByte 10
  218. _bufcount=0
  219. End Method
  220. Field _encoding,_bufcount
  221. End Type
  222. Type TTextStreamFactory Extends TStreamFactory
  223. Method CreateStream:TStream( url:Object,proto$,path$,readable,writeable )
  224. Local encoding
  225. Select proto$
  226. Case "latin1"
  227. encoding=TTextStream.LATIN1
  228. Case "utf8"
  229. encoding=TTextStream.UTF8
  230. Case "utf16be"
  231. encoding=TTextStream.UTF16BE
  232. Case "utf16le"
  233. encoding=TTextStream.UTF16LE
  234. End Select
  235. If Not encoding Return
  236. Local stream:TStream=OpenStream( path,readable,writeable )
  237. If stream Return TTextStream.Create( stream,encoding )
  238. End Method
  239. End Type
  240. New TTextStreamFactory
  241. Rem
  242. bbdoc: Load text from a stream
  243. returns: A string containing the text
  244. about:
  245. #LoadText loads LATIN1, UTF8 or UTF16 text from @url.
  246. The first bytes read from the stream control the format of the text:
  247. [ &$fe $ff | Text is big endian UTF16
  248. * &$ff $fe | Text is little endian UTF16
  249. * &$ef $bb $bf | Text is UTF8
  250. ]
  251. If the first bytes don't match any of the above values, the stream
  252. is assumed to contain LATIN1 text.
  253. A #TStreamReadException is thrown if not all bytes could be read.
  254. End Rem
  255. Function LoadText$( url:Object )
  256. Local stream:TStream=ReadStream( url )
  257. If Not stream Throw New TStreamReadException
  258. Local format,size,c,d,e
  259. If Not stream.Eof()
  260. c=stream.ReadByte()
  261. size:+1
  262. If Not stream.Eof()
  263. d=stream.ReadByte()
  264. size:+1
  265. If c=$fe And d=$ff
  266. format=TTextStream.UTF16BE
  267. Else If c=$ff And d=$fe
  268. format=TTextStream.UTF16LE
  269. Else If c=$ef And d=$bb
  270. If Not stream.Eof()
  271. e=stream.ReadByte()
  272. size:+1
  273. If e=$bf format=TTextStream.UTF8
  274. EndIf
  275. EndIf
  276. EndIf
  277. EndIf
  278. If Not format
  279. Local data:Byte[1024]
  280. data[0]=c;data[1]=d;data[2]=e
  281. While Not stream.Eof()
  282. If size=data.length data=data[..size*2]
  283. size:+stream.Read( (Byte Ptr data)+size,data.length-size )
  284. Wend
  285. stream.Close
  286. Return String.FromBytes( data,size )
  287. EndIf
  288. Local TStream:TTextStream=TTextStream.Create( stream,format )
  289. Local str$=TStream.ReadFile()
  290. TStream.Close
  291. stream.Close
  292. Return str
  293. End Function
  294. Rem
  295. bbdoc: Save text to a stream
  296. about:
  297. #SaveText saves the characters in @str to @url.
  298. If @str contains any characters with a character code greater than 255,
  299. then @str is saved in UTF16 format. Otherwise, @str is saved in LATIN1 format.
  300. A #TStreamWriteException is thrown if not all bytes could be written.
  301. End Rem
  302. Function SaveText( str$,url:Object )
  303. Local format
  304. For Local i=0 Until str.length
  305. If str[i]>255
  306. ?BigEndian
  307. format=TTextStream.UTF16BE
  308. ?LittleEndian
  309. format=TTextStream.UTF16LE
  310. ?
  311. Exit
  312. EndIf
  313. Next
  314. If Not format
  315. SaveString str,url
  316. Return True
  317. EndIf
  318. Local stream:TStream=WriteStream( url )
  319. If Not stream Throw New TStreamWriteException
  320. Select format
  321. Case TTextStream.UTF8
  322. stream.WriteByte $ef
  323. stream.WriteByte $bb
  324. Case TTextStream.UTF16BE
  325. stream.WriteByte $fe
  326. stream.WriteByte $ff
  327. Case TTextStream.UTF16LE
  328. stream.WriteByte $ff
  329. stream.WriteByte $fe
  330. End Select
  331. Local TStream:TTextStream=TTextStream.Create( stream,format )
  332. TStream.WriteString str
  333. TStream.Close
  334. stream.Close
  335. Return True
  336. End Function