sq_parsecsv.cpp 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. #include "squirrel.h"
  2. #include <string.h>
  3. SQ_OPT_STRING_STRLEN();
  4. #include <fstream>
  5. #include <istream>
  6. #include <string>
  7. #include <vector>
  8. /// <summary>loads a CSV record from the stream is</summary>
  9. /// <remarks>
  10. /// * leading and trailing white space is removed outside of
  11. // quoted sections when trimWhiteSpace is true
  12. /// * line breaks are preserved in quoted sections
  13. /// * quote literals consist of two adjacent quote characters
  14. /// * quote literals must be in quoted sections
  15. /// </remarks>
  16. /// <param name=is>input stream for CSV records</param>
  17. /// <param name=trimWhiteSpace>trims white space on unquoted fields</param>
  18. /// <param name=fieldDelim>field delimiter. defaults to ',' for CSV</param>
  19. /// <param name=recordDelim>record delimiter. defaults to '\n' for CSV</param>
  20. /// <param name=quote>delimiter for quoted fields. defaults to '"'</param>
  21. /// <returns>a list of fields in the record</returns>
  22. std::vector<std::string> CsvGetLine(std::istream& is,
  23. bool trimWhiteSpace=true,
  24. const char fieldDelim=',',
  25. const char recordDelim='\n',
  26. const char quote='"')
  27. {
  28. using namespace std;
  29. vector<string> record; // result record list. default empty
  30. string field; // temporary field construction zone
  31. int start = -1, // start of a quoted section for trimming
  32. end = -1; // end of a quoted section for trimming
  33. char ch;
  34. while (is.get(ch))
  35. {
  36. if (ch == fieldDelim || ch == recordDelim)
  37. // fieldDelim and recordDelim mark the end of a
  38. // field. save the field, reset for the next field,
  39. // and break if there are no more fields
  40. {
  41. if (trimWhiteSpace)
  42. // trim all external white space
  43. // exclude chars between start and end
  44. {
  45. const string wsList = " \t\n\f\v\r";
  46. int ePos, sPos;
  47. // order dependency: right trim before let trim
  48. // left trim will invalidate end's index value
  49. if ((ePos = field.find_last_not_of(wsList)) != string::npos)
  50. {
  51. // ePos+1 because find_last_not_of stops on white space
  52. field.erase((end > ePos) ? end : ePos + 1);
  53. }
  54. if ((sPos = field.find_first_not_of(wsList)) != string::npos)
  55. {
  56. field.erase(0, (start != -1 && start < sPos) ? start : sPos);
  57. }
  58. // reset the quoted section
  59. start = end = -1;
  60. }
  61. // save the new field and reset the temporary
  62. record.push_back(field);
  63. field.clear();
  64. // exit case 1: !is, managed by loop condition
  65. // exit case 2: recordDelim, managed here
  66. if (ch == recordDelim) break;
  67. }
  68. else if (ch == quote)
  69. {
  70. // save the start of the quoted section
  71. start = field.length();
  72. while (is.get(ch))
  73. {
  74. if (ch == '"')
  75. {
  76. // consecutive quotes are an escaped quote literal
  77. // only applies in quoted fields
  78. // 'a""b""c' becomes 'abc'
  79. // 'a"""b"""c' becomes 'a"b"c'
  80. // '"a""b""c"' becomes 'a"b"c'
  81. if (is.peek() != '"')
  82. {
  83. // save the end of the quoted section
  84. end = field.length();
  85. break;
  86. }
  87. else field.push_back(is.get());
  88. }
  89. else field.push_back(ch);
  90. }
  91. }
  92. else field.push_back(ch);
  93. }
  94. return record;
  95. }
  96. static const SQChar sq_parse_csv_TAG[] = _SC("ParseCSV");
  97. static SQRESULT sq_parse_csv_release_hook(SQUserPointer p, SQInteger size, void */*ep*/) {
  98. std::ifstream *self = (std::ifstream *)p;
  99. if(self) delete self;
  100. return 0;
  101. }
  102. /*
  103. ** Creates a new ParseCSV.
  104. */
  105. static SQRESULT sq_parse_csv_constructor (HSQUIRRELVM v) {
  106. SQ_FUNC_VARS_NO_TOP(v);
  107. SQ_GET_STRING(v, 2, fname);
  108. std::ifstream *self = new std::ifstream(fname);
  109. if(self->is_open()){
  110. sq_setinstanceup(v, 1, self);
  111. sq_setreleasehook(v, 1, sq_parse_csv_release_hook);
  112. return 1;
  113. }
  114. delete self;
  115. return sq_throwerror(v, _SC("failed to open %s"), fname);
  116. }
  117. static SQRESULT sq_parse_csv_next_row(HSQUIRRELVM v)
  118. {
  119. SQ_FUNC_VARS_NO_TOP(v);
  120. SQ_GET_INSTANCE_VAR(v, 1, std::ifstream, self, sq_parse_csv_TAG);
  121. std::vector<std::string> row_fields = CsvGetLine(*self);
  122. size_t rsize = row_fields.size();
  123. if (rsize == 0) sq_pushnull(v);
  124. else {
  125. sq_newarray(v, rsize);
  126. for(size_t i=0; i<rsize; ++i){
  127. sq_pushinteger(v, i);
  128. std::string &str = row_fields[i];
  129. sq_pushstring(v, str.c_str(), str.size());
  130. sq_rawset(v, -3);
  131. }
  132. }
  133. return 1;
  134. }
  135. #define _DECL_FUNC(name,nparams,tycheck) {_SC(#name),sq_parse_csv_##name,nparams,tycheck}
  136. static SQRegFunction sq_parse_csv_methods[] =
  137. {
  138. _DECL_FUNC(constructor,2,_SC("xs")),
  139. _DECL_FUNC(next_row, 1,_SC("x")),
  140. {0,0}
  141. };
  142. #undef _DECL_FUNC
  143. #ifdef __cplusplus
  144. extern "C" {
  145. #endif
  146. /* This defines a function that opens up your library. */
  147. SQRESULT sqext_register_csv_parser (HSQUIRRELVM v) {
  148. sq_pushstring(v,sq_parse_csv_TAG,-1);
  149. sq_newclass(v,SQFalse);
  150. sq_settypetag(v,-1,(void*)sq_parse_csv_TAG);
  151. sq_insert_reg_funcs(v, sq_parse_csv_methods);
  152. sq_newslot(v,-3,SQTrue);
  153. return SQ_OK;
  154. }
  155. #ifdef __cplusplus
  156. }
  157. #endif