_packer.pyx 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. # coding: utf-8
  2. #cython: embedsignature=True
  3. from cpython cimport *
  4. from libc.stdlib cimport *
  5. from libc.string cimport *
  6. from libc.limits cimport *
  7. from libc.stdint cimport int8_t
  8. from msgpack.exceptions import PackValueError
  9. from msgpack import ExtType
  10. cdef extern from "pack.h":
  11. struct msgpack_packer:
  12. char* buf
  13. size_t length
  14. size_t buf_size
  15. bint use_bin_type
  16. int msgpack_pack_int(msgpack_packer* pk, int d)
  17. int msgpack_pack_nil(msgpack_packer* pk)
  18. int msgpack_pack_true(msgpack_packer* pk)
  19. int msgpack_pack_false(msgpack_packer* pk)
  20. int msgpack_pack_long(msgpack_packer* pk, long d)
  21. int msgpack_pack_long_long(msgpack_packer* pk, long long d)
  22. int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d)
  23. int msgpack_pack_float(msgpack_packer* pk, float d)
  24. int msgpack_pack_double(msgpack_packer* pk, double d)
  25. int msgpack_pack_array(msgpack_packer* pk, size_t l)
  26. int msgpack_pack_map(msgpack_packer* pk, size_t l)
  27. int msgpack_pack_raw(msgpack_packer* pk, size_t l)
  28. int msgpack_pack_bin(msgpack_packer* pk, size_t l)
  29. int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l)
  30. int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l)
  31. cdef int DEFAULT_RECURSE_LIMIT=511
  32. cdef class Packer(object):
  33. """
  34. MessagePack Packer
  35. usage::
  36. packer = Packer()
  37. astream.write(packer.pack(a))
  38. astream.write(packer.pack(b))
  39. Packer's constructor has some keyword arguments:
  40. :param callable default:
  41. Convert user type to builtin type that Packer supports.
  42. See also simplejson's document.
  43. :param str encoding:
  44. Convert unicode to bytes with this encoding. (default: 'utf-8')
  45. :param str unicode_errors:
  46. Error handler for encoding unicode. (default: 'strict')
  47. :param bool use_single_float:
  48. Use single precision float type for float. (default: False)
  49. :param bool autoreset:
  50. Reset buffer after each pack and return it's content as `bytes`. (default: True).
  51. If set this to false, use `bytes()` to get content and `.reset()` to clear buffer.
  52. :param bool use_bin_type:
  53. Use bin type introduced in msgpack spec 2.0 for bytes.
  54. It also enable str8 type for unicode.
  55. """
  56. cdef msgpack_packer pk
  57. cdef object _default
  58. cdef object _bencoding
  59. cdef object _berrors
  60. cdef char *encoding
  61. cdef char *unicode_errors
  62. cdef bool use_float
  63. cdef bint autoreset
  64. def __cinit__(self):
  65. cdef int buf_size = 1024*1024
  66. self.pk.buf = <char*> malloc(buf_size);
  67. if self.pk.buf == NULL:
  68. raise MemoryError("Unable to allocate internal buffer.")
  69. self.pk.buf_size = buf_size
  70. self.pk.length = 0
  71. def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
  72. use_single_float=False, bint autoreset=1, bint use_bin_type=0):
  73. """
  74. """
  75. self.use_float = use_single_float
  76. self.autoreset = autoreset
  77. self.pk.use_bin_type = use_bin_type
  78. if default is not None:
  79. if not PyCallable_Check(default):
  80. raise TypeError("default must be a callable.")
  81. self._default = default
  82. if encoding is None:
  83. self.encoding = NULL
  84. self.unicode_errors = NULL
  85. else:
  86. if isinstance(encoding, unicode):
  87. self._bencoding = encoding.encode('ascii')
  88. else:
  89. self._bencoding = encoding
  90. self.encoding = PyBytes_AsString(self._bencoding)
  91. if isinstance(unicode_errors, unicode):
  92. self._berrors = unicode_errors.encode('ascii')
  93. else:
  94. self._berrors = unicode_errors
  95. self.unicode_errors = PyBytes_AsString(self._berrors)
  96. def __dealloc__(self):
  97. free(self.pk.buf);
  98. cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1:
  99. cdef long long llval
  100. cdef unsigned long long ullval
  101. cdef long longval
  102. cdef float fval
  103. cdef double dval
  104. cdef char* rawval
  105. cdef int ret
  106. cdef dict d
  107. cdef size_t L
  108. cdef int default_used = 0
  109. if nest_limit < 0:
  110. raise PackValueError("recursion limit exceeded.")
  111. while True:
  112. if o is None:
  113. ret = msgpack_pack_nil(&self.pk)
  114. elif isinstance(o, bool):
  115. if o:
  116. ret = msgpack_pack_true(&self.pk)
  117. else:
  118. ret = msgpack_pack_false(&self.pk)
  119. elif PyLong_Check(o):
  120. # PyInt_Check(long) is True for Python 3.
  121. # Sow we should test long before int.
  122. if o > 0:
  123. ullval = o
  124. ret = msgpack_pack_unsigned_long_long(&self.pk, ullval)
  125. else:
  126. llval = o
  127. ret = msgpack_pack_long_long(&self.pk, llval)
  128. elif PyInt_Check(o):
  129. longval = o
  130. ret = msgpack_pack_long(&self.pk, longval)
  131. elif PyFloat_Check(o):
  132. if self.use_float:
  133. fval = o
  134. ret = msgpack_pack_float(&self.pk, fval)
  135. else:
  136. dval = o
  137. ret = msgpack_pack_double(&self.pk, dval)
  138. elif PyBytes_Check(o):
  139. L = len(o)
  140. if L > (2**32)-1:
  141. raise ValueError("bytes is too large")
  142. rawval = o
  143. ret = msgpack_pack_bin(&self.pk, L)
  144. if ret == 0:
  145. ret = msgpack_pack_raw_body(&self.pk, rawval, L)
  146. elif PyUnicode_Check(o):
  147. if not self.encoding:
  148. raise TypeError("Can't encode unicode string: no encoding is specified")
  149. o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
  150. L = len(o)
  151. if L > (2**32)-1:
  152. raise ValueError("dict is too large")
  153. rawval = o
  154. ret = msgpack_pack_raw(&self.pk, len(o))
  155. if ret == 0:
  156. ret = msgpack_pack_raw_body(&self.pk, rawval, len(o))
  157. elif PyDict_CheckExact(o):
  158. d = <dict>o
  159. L = len(d)
  160. if L > (2**32)-1:
  161. raise ValueError("dict is too large")
  162. ret = msgpack_pack_map(&self.pk, L)
  163. if ret == 0:
  164. for k, v in d.iteritems():
  165. ret = self._pack(k, nest_limit-1)
  166. if ret != 0: break
  167. ret = self._pack(v, nest_limit-1)
  168. if ret != 0: break
  169. elif PyDict_Check(o):
  170. L = len(o)
  171. if L > (2**32)-1:
  172. raise ValueError("dict is too large")
  173. ret = msgpack_pack_map(&self.pk, L)
  174. if ret == 0:
  175. for k, v in o.items():
  176. ret = self._pack(k, nest_limit-1)
  177. if ret != 0: break
  178. ret = self._pack(v, nest_limit-1)
  179. if ret != 0: break
  180. elif isinstance(o, ExtType):
  181. # This should be before Tuple because ExtType is namedtuple.
  182. longval = o.code
  183. rawval = o.data
  184. L = len(o.data)
  185. if L > (2**32)-1:
  186. raise ValueError("EXT data is too large")
  187. ret = msgpack_pack_ext(&self.pk, longval, L)
  188. ret = msgpack_pack_raw_body(&self.pk, rawval, L)
  189. elif PyTuple_Check(o) or PyList_Check(o):
  190. L = len(o)
  191. if L > (2**32)-1:
  192. raise ValueError("list is too large")
  193. ret = msgpack_pack_array(&self.pk, L)
  194. if ret == 0:
  195. for v in o:
  196. ret = self._pack(v, nest_limit-1)
  197. if ret != 0: break
  198. elif not default_used and self._default:
  199. o = self._default(o)
  200. default_used = 1
  201. continue
  202. else:
  203. raise TypeError("can't serialize %r" % (o,))
  204. return ret
  205. cpdef pack(self, object obj):
  206. cdef int ret
  207. ret = self._pack(obj, DEFAULT_RECURSE_LIMIT)
  208. if ret == -1:
  209. raise MemoryError
  210. elif ret: # should not happen.
  211. raise TypeError
  212. if self.autoreset:
  213. buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
  214. self.pk.length = 0
  215. return buf
  216. def pack_ext_type(self, typecode, data):
  217. msgpack_pack_ext(&self.pk, typecode, len(data))
  218. msgpack_pack_raw_body(&self.pk, data, len(data))
  219. def pack_array_header(self, size_t size):
  220. if size > (2**32-1):
  221. raise ValueError
  222. cdef int ret = msgpack_pack_array(&self.pk, size)
  223. if ret == -1:
  224. raise MemoryError
  225. elif ret: # should not happen
  226. raise TypeError
  227. if self.autoreset:
  228. buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
  229. self.pk.length = 0
  230. return buf
  231. def pack_map_header(self, size_t size):
  232. if size > (2**32-1):
  233. raise ValueError
  234. cdef int ret = msgpack_pack_map(&self.pk, size)
  235. if ret == -1:
  236. raise MemoryError
  237. elif ret: # should not happen
  238. raise TypeError
  239. if self.autoreset:
  240. buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
  241. self.pk.length = 0
  242. return buf
  243. def pack_map_pairs(self, object pairs):
  244. """
  245. Pack *pairs* as msgpack map type.
  246. *pairs* should sequence of pair.
  247. (`len(pairs)` and `for k, v in pairs:` should be supported.)
  248. """
  249. cdef int ret = msgpack_pack_map(&self.pk, len(pairs))
  250. if ret == 0:
  251. for k, v in pairs:
  252. ret = self._pack(k)
  253. if ret != 0: break
  254. ret = self._pack(v)
  255. if ret != 0: break
  256. if ret == -1:
  257. raise MemoryError
  258. elif ret: # should not happen
  259. raise TypeError
  260. if self.autoreset:
  261. buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
  262. self.pk.length = 0
  263. return buf
  264. def reset(self):
  265. """Clear internal buffer."""
  266. self.pk.length = 0
  267. def bytes(self):
  268. """Return buffer content."""
  269. return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)