port of Israel Ekpo\'s CSV parser library

Dependents:   parser_sample IoTGateway_Basic

Committer:
hlipka
Date:
Mon Jan 24 23:08:37 2011 +0000
Revision:
0:7c9aa931c67c
initial version

Who changed what in which revision?

UserRevisionLine numberNew contents of line
hlipka 0:7c9aa931c67c 1 /*
hlipka 0:7c9aa931c67c 2
hlipka 0:7c9aa931c67c 3 Copyright (c) 2008 - 2009, Israel Ekpo
hlipka 0:7c9aa931c67c 4 All rights reserved.
hlipka 0:7c9aa931c67c 5
hlipka 0:7c9aa931c67c 6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
hlipka 0:7c9aa931c67c 7
hlipka 0:7c9aa931c67c 8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
hlipka 0:7c9aa931c67c 9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
hlipka 0:7c9aa931c67c 10 * Neither the name of Israel Ekpo nor the names of contributors may be used to endorse or promote products derived from this software without specific prior written permission.
hlipka 0:7c9aa931c67c 11
hlipka 0:7c9aa931c67c 12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
hlipka 0:7c9aa931c67c 13
hlipka 0:7c9aa931c67c 14
hlipka 0:7c9aa931c67c 15 */
hlipka 0:7c9aa931c67c 16 /**
hlipka 0:7c9aa931c67c 17 * csv_parser Header File
hlipka 0:7c9aa931c67c 18 *
hlipka 0:7c9aa931c67c 19 * This object is used to parse text documents that are delimited by some
hlipka 0:7c9aa931c67c 20 * type of character. Some of the common ones use spaces, tabs, commas and semi-colons.
hlipka 0:7c9aa931c67c 21 *
hlipka 0:7c9aa931c67c 22 * This is a list of common characters encountered by this program
hlipka 0:7c9aa931c67c 23 *
hlipka 0:7c9aa931c67c 24 * This list was prepared from the data from http://www.asciitable.com
hlipka 0:7c9aa931c67c 25 *
hlipka 0:7c9aa931c67c 26 * @li DEC is how it would be represented in decimal form (base 10)
hlipka 0:7c9aa931c67c 27 * @li HEX is how it would be represented in hexadecimal format (base 16)
hlipka 0:7c9aa931c67c 28 *
hlipka 0:7c9aa931c67c 29 * @li DEC HEX Character Name
hlipka 0:7c9aa931c67c 30 * @li 0 0x00 null
hlipka 0:7c9aa931c67c 31 * @li 9 0x09 horizontal tab
hlipka 0:7c9aa931c67c 32 * @li 10 0x0A line feed, new line
hlipka 0:7c9aa931c67c 33 * @li 13 0x0D carriage return
hlipka 0:7c9aa931c67c 34 * @li 27 0x1B escape
hlipka 0:7c9aa931c67c 35 * @li 32 0x20 space
hlipka 0:7c9aa931c67c 36 * @li 33 0x21 double quote
hlipka 0:7c9aa931c67c 37 * @li 39 0x27 single quote
hlipka 0:7c9aa931c67c 38 * @li 44 0x2C comma
hlipka 0:7c9aa931c67c 39 * @li 92 0x5C backslash
hlipka 0:7c9aa931c67c 40 *
hlipka 0:7c9aa931c67c 41 * @author Israel Ekpo <israel.ekpo@israelekpo.com>
hlipka 0:7c9aa931c67c 42 */
hlipka 0:7c9aa931c67c 43
hlipka 0:7c9aa931c67c 44 #ifndef CSV_PARSER_HPP_INCLUDED
hlipka 0:7c9aa931c67c 45
hlipka 0:7c9aa931c67c 46 #define CSV_PARSER_HPP_INCLUDED
hlipka 0:7c9aa931c67c 47
hlipka 0:7c9aa931c67c 48 #define LIBCSV_PARSER_MAJOR_VERSION 1
hlipka 0:7c9aa931c67c 49
hlipka 0:7c9aa931c67c 50 #define LIBCSV_PARSER_MINOR_VERSION 0
hlipka 0:7c9aa931c67c 51
hlipka 0:7c9aa931c67c 52 #define LIBCSV_PARSER_PATCH_VERSION 0
hlipka 0:7c9aa931c67c 53
hlipka 0:7c9aa931c67c 54 #define LIBCSV_PARSER_VERSION_NUMBER 10000
hlipka 0:7c9aa931c67c 55
hlipka 0:7c9aa931c67c 56 /* C++ header files */
hlipka 0:7c9aa931c67c 57 #include <string>
hlipka 0:7c9aa931c67c 58 #include <vector>
hlipka 0:7c9aa931c67c 59
hlipka 0:7c9aa931c67c 60 /* C header files */
hlipka 0:7c9aa931c67c 61 #include <cstdio>
hlipka 0:7c9aa931c67c 62 #include <cstring>
hlipka 0:7c9aa931c67c 63 #include <cstdlib>
hlipka 0:7c9aa931c67c 64
hlipka 0:7c9aa931c67c 65 using namespace std;
hlipka 0:7c9aa931c67c 66
hlipka 0:7c9aa931c67c 67 /**
hlipka 0:7c9aa931c67c 68 * @typedef csv_row
hlipka 0:7c9aa931c67c 69 *
hlipka 0:7c9aa931c67c 70 * Data structure used to represent a record.
hlipka 0:7c9aa931c67c 71 *
hlipka 0:7c9aa931c67c 72 * This is an alias for vector <string>
hlipka 0:7c9aa931c67c 73 */
hlipka 0:7c9aa931c67c 74 typedef vector <string> csv_row;
hlipka 0:7c9aa931c67c 75
hlipka 0:7c9aa931c67c 76 /**
hlipka 0:7c9aa931c67c 77 * @typedef csv_row_ptr
hlipka 0:7c9aa931c67c 78 *
hlipka 0:7c9aa931c67c 79 * Pointer to a csv_row object
hlipka 0:7c9aa931c67c 80 *
hlipka 0:7c9aa931c67c 81 * Expands to vector <string> *
hlipka 0:7c9aa931c67c 82 */
hlipka 0:7c9aa931c67c 83 typedef csv_row * csv_row_ptr;
hlipka 0:7c9aa931c67c 84
hlipka 0:7c9aa931c67c 85 /**
hlipka 0:7c9aa931c67c 86 * @typedef enclosure_type_t
hlipka 0:7c9aa931c67c 87 *
hlipka 0:7c9aa931c67c 88 * This enum type is used to set the mode in which the CSV file is parsed.
hlipka 0:7c9aa931c67c 89 *
hlipka 0:7c9aa931c67c 90 * @li ENCLOSURE_NONE (1) means the CSV file does not use any enclosure characters for the fields
hlipka 0:7c9aa931c67c 91 * @li ENCLOSURE_REQUIRED (2) means the CSV file requires enclosure characters for all the fields
hlipka 0:7c9aa931c67c 92 * @li ENCLOSURE_OPTIONAL (3) means the use of enclosure characters for the fields is optional
hlipka 0:7c9aa931c67c 93 *
hlipka 0:7c9aa931c67c 94 * The ENCLOSURE_TYPE_BEGIN and ENCLOSURE_TYPE_END members of this enum definition are never to be used.
hlipka 0:7c9aa931c67c 95 */
hlipka 0:7c9aa931c67c 96 typedef enum
hlipka 0:7c9aa931c67c 97 {
hlipka 0:7c9aa931c67c 98 ENCLOSURE_TYPE_BEGIN = 0,
hlipka 0:7c9aa931c67c 99 ENCLOSURE_NONE = 1,
hlipka 0:7c9aa931c67c 100 ENCLOSURE_REQUIRED = 2,
hlipka 0:7c9aa931c67c 101 ENCLOSURE_OPTIONAL = 3,
hlipka 0:7c9aa931c67c 102 ENCLOSURE_TYPE_END
hlipka 0:7c9aa931c67c 103
hlipka 0:7c9aa931c67c 104 } enclosure_type_t;
hlipka 0:7c9aa931c67c 105
hlipka 0:7c9aa931c67c 106 /**
hlipka 0:7c9aa931c67c 107 * @def CSV_PARSER_FREE_BUFFER_PTR(ptr)
hlipka 0:7c9aa931c67c 108 *
hlipka 0:7c9aa931c67c 109 * Used to deallocate buffer pointers
hlipka 0:7c9aa931c67c 110 *
hlipka 0:7c9aa931c67c 111 * It deallocates the pointer only if it is not null
hlipka 0:7c9aa931c67c 112 */
hlipka 0:7c9aa931c67c 113 #define CSV_PARSER_FREE_BUFFER_PTR(ptr) \
hlipka 0:7c9aa931c67c 114 if (ptr != NULL) \
hlipka 0:7c9aa931c67c 115 { \
hlipka 0:7c9aa931c67c 116 free(ptr); \
hlipka 0:7c9aa931c67c 117 \
hlipka 0:7c9aa931c67c 118 ptr = NULL; \
hlipka 0:7c9aa931c67c 119 }
hlipka 0:7c9aa931c67c 120
hlipka 0:7c9aa931c67c 121 /**
hlipka 0:7c9aa931c67c 122 * @def CSV_PARSER_FREE_FILE_PTR(fptr)
hlipka 0:7c9aa931c67c 123 *
hlipka 0:7c9aa931c67c 124 * Used to close open file handles
hlipka 0:7c9aa931c67c 125 *
hlipka 0:7c9aa931c67c 126 * It closes the file only if it is not null
hlipka 0:7c9aa931c67c 127 */
hlipka 0:7c9aa931c67c 128 #define CSV_PARSER_FREE_FILE_PTR(fptr) \
hlipka 0:7c9aa931c67c 129 if (fptr != NULL) \
hlipka 0:7c9aa931c67c 130 { \
hlipka 0:7c9aa931c67c 131 fclose(fptr); \
hlipka 0:7c9aa931c67c 132 \
hlipka 0:7c9aa931c67c 133 fptr = NULL; \
hlipka 0:7c9aa931c67c 134 }
hlipka 0:7c9aa931c67c 135
hlipka 0:7c9aa931c67c 136 /**
hlipka 0:7c9aa931c67c 137 * @class csv_parser
hlipka 0:7c9aa931c67c 138 *
hlipka 0:7c9aa931c67c 139 * The csv_parser object
hlipka 0:7c9aa931c67c 140 *
hlipka 0:7c9aa931c67c 141 * Used to parse text files to extract records and fields.
hlipka 0:7c9aa931c67c 142 *
hlipka 0:7c9aa931c67c 143 * We are making the following assumptions :
hlipka 0:7c9aa931c67c 144 *
hlipka 0:7c9aa931c67c 145 * @li The record terminator is only one character in length.
hlipka 0:7c9aa931c67c 146 * @li The field terminator is only one character in length.
hlipka 0:7c9aa931c67c 147 * @li The fields are enclosed by single characters, if any.
hlipka 0:7c9aa931c67c 148 *
hlipka 0:7c9aa931c67c 149 * @li The parser can handle documents where fields are always enclosed, not enclosed at all or optionally enclosed.
hlipka 0:7c9aa931c67c 150 * @li When fields are strictly all enclosed, there is an assumption that any enclosure characters within the field are escaped by placing a backslash in front of the enclosure character.
hlipka 0:7c9aa931c67c 151 *
hlipka 0:7c9aa931c67c 152 * The CSV files can be parsed in 3 modes.
hlipka 0:7c9aa931c67c 153 * @li (a) No enclosures
hlipka 0:7c9aa931c67c 154 * @li (b) Fields always enclosed.
hlipka 0:7c9aa931c67c 155 * @li (c) Fields optionally enclosed.
hlipka 0:7c9aa931c67c 156 *
hlipka 0:7c9aa931c67c 157 * For option (c) when the enclosure character is optional, if an enclosure character is spotted at either the beginning
hlipka 0:7c9aa931c67c 158 * or the end of the string, it is assumed that the field is enclosed.
hlipka 0:7c9aa931c67c 159 *
hlipka 0:7c9aa931c67c 160 * The csv_parser::init() method can accept a character array as the path to the CSV file.
hlipka 0:7c9aa931c67c 161 * Since it is overloaded, it can also accept a FILE pointer to a stream that is already open for reading.
hlipka 0:7c9aa931c67c 162 *
hlipka 0:7c9aa931c67c 163 * The set_enclosed_char() method accepts the field enclosure character as the first parameter and the enclosure mode as the second parameter which
hlipka 0:7c9aa931c67c 164 * controls how the text file is going to be parsed.
hlipka 0:7c9aa931c67c 165 *
hlipka 0:7c9aa931c67c 166 * @see csv_parser::set_enclosed_char()
hlipka 0:7c9aa931c67c 167 * @see enclosure_type_t
hlipka 0:7c9aa931c67c 168 *
hlipka 0:7c9aa931c67c 169 * @todo Add ability to parse files where fields/columns are terminated by strings instead of just one char.
hlipka 0:7c9aa931c67c 170 * @todo Add ability to set strings where lines start by. Currently lines do not have any starting char or string.
hlipka 0:7c9aa931c67c 171 * @todo Add ability to set strings where line end by. Currently lines can only end with a single char.
hlipka 0:7c9aa931c67c 172 * @todo Add ability to accept other escape characters besides the backslash character 0x5C.
hlipka 0:7c9aa931c67c 173 * @todo More support for improperly formatted CSV data files.
hlipka 0:7c9aa931c67c 174 *
hlipka 0:7c9aa931c67c 175 * @author Israel Ekpo <israel.ekpo@israelekpo.com>
hlipka 0:7c9aa931c67c 176 */
hlipka 0:7c9aa931c67c 177 class csv_parser
hlipka 0:7c9aa931c67c 178 {
hlipka 0:7c9aa931c67c 179
hlipka 0:7c9aa931c67c 180 public :
hlipka 0:7c9aa931c67c 181
hlipka 0:7c9aa931c67c 182 /**
hlipka 0:7c9aa931c67c 183 * Class constructor
hlipka 0:7c9aa931c67c 184 *
hlipka 0:7c9aa931c67c 185 * This is the default constructor.
hlipka 0:7c9aa931c67c 186 *
hlipka 0:7c9aa931c67c 187 * All the internal attributes are initialized here
hlipka 0:7c9aa931c67c 188 *
hlipka 0:7c9aa931c67c 189 * @li The enclosure character is initialized to NULL 0x00.
hlipka 0:7c9aa931c67c 190 * @li The escape character is initialized to the backslash character 0x5C.
hlipka 0:7c9aa931c67c 191 * @li The field delimiter character is initialized to a comma 0x2C.
hlipka 0:7c9aa931c67c 192 * @li The record delimiter character is initialized to a new line character 0x0A.
hlipka 0:7c9aa931c67c 193 *
hlipka 0:7c9aa931c67c 194 * @li The lengths of all the above-mentioned fields are initialized to 0,1,1 and 1 respectively.
hlipka 0:7c9aa931c67c 195 * @li The number of records to ignore is set to zero.
hlipka 0:7c9aa931c67c 196 * @li The more_rows internal attribute is set to false.
hlipka 0:7c9aa931c67c 197 * @li The pointer to the CSV input file is initialized to NULL
hlipka 0:7c9aa931c67c 198 * @li The pointer to the buffer for the file name is also initialized to NULL
hlipka 0:7c9aa931c67c 199 */
hlipka 0:7c9aa931c67c 200 csv_parser() : enclosed_char(0x00), escaped_char(0x5C),
hlipka 0:7c9aa931c67c 201 field_term_char(0x2C), line_term_char(0x0A),
hlipka 0:7c9aa931c67c 202 enclosed_length(0U), escaped_length(1U),
hlipka 0:7c9aa931c67c 203 field_term_length(1U), line_term_length(1U),
hlipka 0:7c9aa931c67c 204 ignore_num_lines(0U), record_count(0U),
hlipka 0:7c9aa931c67c 205 input_fp(NULL), input_filename(NULL),
hlipka 0:7c9aa931c67c 206 enclosure_type(ENCLOSURE_NONE),
hlipka 0:7c9aa931c67c 207 more_rows(false)
hlipka 0:7c9aa931c67c 208 { }
hlipka 0:7c9aa931c67c 209
hlipka 0:7c9aa931c67c 210 /**
hlipka 0:7c9aa931c67c 211 * Class destructor
hlipka 0:7c9aa931c67c 212 *
hlipka 0:7c9aa931c67c 213 * In the class destructor the file pointer to the input CSV file is closed and
hlipka 0:7c9aa931c67c 214 * the buffer to the input file name is also deallocated.
hlipka 0:7c9aa931c67c 215 *
hlipka 0:7c9aa931c67c 216 * @see csv_parser::input_fp
hlipka 0:7c9aa931c67c 217 * @see csv_parser::input_filename
hlipka 0:7c9aa931c67c 218 */
hlipka 0:7c9aa931c67c 219 ~csv_parser()
hlipka 0:7c9aa931c67c 220 {
hlipka 0:7c9aa931c67c 221 CSV_PARSER_FREE_FILE_PTR(input_fp);
hlipka 0:7c9aa931c67c 222
hlipka 0:7c9aa931c67c 223 CSV_PARSER_FREE_BUFFER_PTR(input_filename);
hlipka 0:7c9aa931c67c 224 }
hlipka 0:7c9aa931c67c 225
hlipka 0:7c9aa931c67c 226 /**
hlipka 0:7c9aa931c67c 227 * Initializes the current object
hlipka 0:7c9aa931c67c 228 *
hlipka 0:7c9aa931c67c 229 * This init method accepts a pointer to the CSV file that has been opened for reading
hlipka 0:7c9aa931c67c 230 *
hlipka 0:7c9aa931c67c 231 * It also resets the file pointer to the beginning of the stream
hlipka 0:7c9aa931c67c 232 *
hlipka 0:7c9aa931c67c 233 * @overload bool init(FILE * input_file_pointer)
hlipka 0:7c9aa931c67c 234 * @param[in] input_file_pointer
hlipka 0:7c9aa931c67c 235 * @return bool Returns true on success and false on error.
hlipka 0:7c9aa931c67c 236 */
hlipka 0:7c9aa931c67c 237 bool init(FILE * input_file_pointer);
hlipka 0:7c9aa931c67c 238
hlipka 0:7c9aa931c67c 239 /**
hlipka 0:7c9aa931c67c 240 * Initializes the current object
hlipka 0:7c9aa931c67c 241 *
hlipka 0:7c9aa931c67c 242 * @li This init method accepts a character array as the path to the csv file.
hlipka 0:7c9aa931c67c 243 * @li It sets the value of the csv_parser::input_filename property.
hlipka 0:7c9aa931c67c 244 * @li Then it creates a pointer to the csv_parser::input_fp property.
hlipka 0:7c9aa931c67c 245 *
hlipka 0:7c9aa931c67c 246 * @overload bool init(const char * input_filename)
hlipka 0:7c9aa931c67c 247 * @param[in] input_filename
hlipka 0:7c9aa931c67c 248 * @return bool Returns true on success and false on error.
hlipka 0:7c9aa931c67c 249 */
hlipka 0:7c9aa931c67c 250 bool init(const char * input_filename);
hlipka 0:7c9aa931c67c 251
hlipka 0:7c9aa931c67c 252 /**
hlipka 0:7c9aa931c67c 253 * Defines the Field Enclosure character used in the Text File
hlipka 0:7c9aa931c67c 254 *
hlipka 0:7c9aa931c67c 255 * Setting this to NULL means that the enclosure character is optional.
hlipka 0:7c9aa931c67c 256 *
hlipka 0:7c9aa931c67c 257 * If the enclosure is optional, there could be fields that are enclosed, and fields that are not enclosed within the same line/record.
hlipka 0:7c9aa931c67c 258 *
hlipka 0:7c9aa931c67c 259 * @param[in] fields_enclosed_by The character used to enclose the fields.
hlipka 0:7c9aa931c67c 260 * @param[in] enclosure_mode How the CSV file should be parsed.
hlipka 0:7c9aa931c67c 261 * @return void
hlipka 0:7c9aa931c67c 262 */
hlipka 0:7c9aa931c67c 263 void set_enclosed_char(char fields_enclosed_by, enclosure_type_t enclosure_mode);
hlipka 0:7c9aa931c67c 264
hlipka 0:7c9aa931c67c 265 /**
hlipka 0:7c9aa931c67c 266 * Defines the Field Delimiter character used in the text file
hlipka 0:7c9aa931c67c 267 *
hlipka 0:7c9aa931c67c 268 * @param[in] fields_terminated_by
hlipka 0:7c9aa931c67c 269 * @return void
hlipka 0:7c9aa931c67c 270 */
hlipka 0:7c9aa931c67c 271 void set_field_term_char(char fields_terminated_by);
hlipka 0:7c9aa931c67c 272
hlipka 0:7c9aa931c67c 273 /**
hlipka 0:7c9aa931c67c 274 * Defines the Record Terminator character used in the text file
hlipka 0:7c9aa931c67c 275 *
hlipka 0:7c9aa931c67c 276 * @param[in] lines_terminated_by
hlipka 0:7c9aa931c67c 277 * @return void
hlipka 0:7c9aa931c67c 278 */
hlipka 0:7c9aa931c67c 279 void set_line_term_char(char lines_terminated_by);
hlipka 0:7c9aa931c67c 280
hlipka 0:7c9aa931c67c 281 /**
hlipka 0:7c9aa931c67c 282 * Returns whether there is still more data
hlipka 0:7c9aa931c67c 283 *
hlipka 0:7c9aa931c67c 284 * This method returns a boolean value indicating whether or not there are
hlipka 0:7c9aa931c67c 285 * still more records to be extracted in the current file being parsed.
hlipka 0:7c9aa931c67c 286 *
hlipka 0:7c9aa931c67c 287 * Call this method to see if there are more rows to retrieve before invoking csv_parser::get_row()
hlipka 0:7c9aa931c67c 288 *
hlipka 0:7c9aa931c67c 289 * @see csv_parser::get_row()
hlipka 0:7c9aa931c67c 290 * @see csv_parser::more_rows
hlipka 0:7c9aa931c67c 291 *
hlipka 0:7c9aa931c67c 292 * @return bool Returns true if there are still more rows and false if there is not.
hlipka 0:7c9aa931c67c 293 */
hlipka 0:7c9aa931c67c 294 bool has_more_rows(void)
hlipka 0:7c9aa931c67c 295 {
hlipka 0:7c9aa931c67c 296 return more_rows;
hlipka 0:7c9aa931c67c 297 }
hlipka 0:7c9aa931c67c 298
hlipka 0:7c9aa931c67c 299 /**
hlipka 0:7c9aa931c67c 300 * Defines the number of records to discard
hlipka 0:7c9aa931c67c 301 *
hlipka 0:7c9aa931c67c 302 * The number of records specified will be discarded during the parsing process.
hlipka 0:7c9aa931c67c 303 *
hlipka 0:7c9aa931c67c 304 * @see csv_parser::_skip_lines()
hlipka 0:7c9aa931c67c 305 * @see csv_parser::get_row()
hlipka 0:7c9aa931c67c 306 * @see csv_parser::has_more_rows()
hlipka 0:7c9aa931c67c 307 *
hlipka 0:7c9aa931c67c 308 * @param[in] lines_to_skip How many records should be skipped
hlipka 0:7c9aa931c67c 309 * @return void
hlipka 0:7c9aa931c67c 310 */
hlipka 0:7c9aa931c67c 311 void set_skip_lines(unsigned int lines_to_skip)
hlipka 0:7c9aa931c67c 312 {
hlipka 0:7c9aa931c67c 313 ignore_num_lines = lines_to_skip;
hlipka 0:7c9aa931c67c 314 }
hlipka 0:7c9aa931c67c 315
hlipka 0:7c9aa931c67c 316 /**
hlipka 0:7c9aa931c67c 317 * Return the current row from the CSV file
hlipka 0:7c9aa931c67c 318 *
hlipka 0:7c9aa931c67c 319 * The row is returned as a vector of string objects.
hlipka 0:7c9aa931c67c 320 *
hlipka 0:7c9aa931c67c 321 * This method should be called only if csv_parser::has_more_rows() is true
hlipka 0:7c9aa931c67c 322 *
hlipka 0:7c9aa931c67c 323 * @see csv_parser::has_more_rows()
hlipka 0:7c9aa931c67c 324 * @see csv_parser::get_record_count()
hlipka 0:7c9aa931c67c 325 * @see csv_parser::reset_record_count()
hlipka 0:7c9aa931c67c 326 * @see csv_parser::more_rows
hlipka 0:7c9aa931c67c 327 *
hlipka 0:7c9aa931c67c 328 * @return csv_row A vector type containing an array of strings
hlipka 0:7c9aa931c67c 329 */
hlipka 0:7c9aa931c67c 330 csv_row get_row(void);
hlipka 0:7c9aa931c67c 331
hlipka 0:7c9aa931c67c 332 /**
hlipka 0:7c9aa931c67c 333 * Returns the number of times the csv_parser::get_row() method has been invoked
hlipka 0:7c9aa931c67c 334 *
hlipka 0:7c9aa931c67c 335 * @see csv_parser::reset_record_count()
hlipka 0:7c9aa931c67c 336 * @return unsigned int The number of times the csv_parser::get_row() method has been invoked.
hlipka 0:7c9aa931c67c 337 */
hlipka 0:7c9aa931c67c 338 unsigned int get_record_count(void)
hlipka 0:7c9aa931c67c 339 {
hlipka 0:7c9aa931c67c 340 return record_count;
hlipka 0:7c9aa931c67c 341 }
hlipka 0:7c9aa931c67c 342
hlipka 0:7c9aa931c67c 343 /**
hlipka 0:7c9aa931c67c 344 * Resets the record_count internal attribute to zero
hlipka 0:7c9aa931c67c 345 *
hlipka 0:7c9aa931c67c 346 * This may be used if the object is reused multiple times.
hlipka 0:7c9aa931c67c 347 *
hlipka 0:7c9aa931c67c 348 * @see csv_parser::record_count
hlipka 0:7c9aa931c67c 349 * @see csv_parser::get_record_count()
hlipka 0:7c9aa931c67c 350 * @return void
hlipka 0:7c9aa931c67c 351 */
hlipka 0:7c9aa931c67c 352 void reset_record_count(void)
hlipka 0:7c9aa931c67c 353 {
hlipka 0:7c9aa931c67c 354 record_count = 0U;
hlipka 0:7c9aa931c67c 355 }
hlipka 0:7c9aa931c67c 356
hlipka 0:7c9aa931c67c 357 private :
hlipka 0:7c9aa931c67c 358
hlipka 0:7c9aa931c67c 359 /**
hlipka 0:7c9aa931c67c 360 * Ignores N records in the CSV file
hlipka 0:7c9aa931c67c 361 *
hlipka 0:7c9aa931c67c 362 * Where N is the value of the csv_parser::ignore_num_lines internal property.
hlipka 0:7c9aa931c67c 363 *
hlipka 0:7c9aa931c67c 364 * The number of lines skipped can be defined by csv_parser::set_skip_lines()
hlipka 0:7c9aa931c67c 365 *
hlipka 0:7c9aa931c67c 366 * @see csv_parser::set_skip_lines()
hlipka 0:7c9aa931c67c 367 *
hlipka 0:7c9aa931c67c 368 * @return void
hlipka 0:7c9aa931c67c 369 */
hlipka 0:7c9aa931c67c 370 void _skip_lines(void);
hlipka 0:7c9aa931c67c 371
hlipka 0:7c9aa931c67c 372 /**
hlipka 0:7c9aa931c67c 373 * Reads a Single Line
hlipka 0:7c9aa931c67c 374 *
hlipka 0:7c9aa931c67c 375 * Reads a single record into the buffer passed by reference to the method
hlipka 0:7c9aa931c67c 376 *
hlipka 0:7c9aa931c67c 377 * @param[in,out] buffer A pointer to a character array for the current line.
hlipka 0:7c9aa931c67c 378 * @param[out] buffer_len A pointer to an integer storing the length of the buffer.
hlipka 0:7c9aa931c67c 379 * @return void
hlipka 0:7c9aa931c67c 380 */
hlipka 0:7c9aa931c67c 381 void _read_single_line(char ** buffer, unsigned int * buffer_len);
hlipka 0:7c9aa931c67c 382
hlipka 0:7c9aa931c67c 383 /**
hlipka 0:7c9aa931c67c 384 * Extracts the fields without enclosures
hlipka 0:7c9aa931c67c 385 *
hlipka 0:7c9aa931c67c 386 * This is used when the enclosure character is not set
hlipka 0:7c9aa931c67c 387 * @param[out] row The vector of strings
hlipka 0:7c9aa931c67c 388 * @param[in] line The character array buffer containing the current record/line
hlipka 0:7c9aa931c67c 389 * @param[in] line_length The length of the buffer
hlipka 0:7c9aa931c67c 390 */
hlipka 0:7c9aa931c67c 391 void _get_fields_without_enclosure(csv_row_ptr row, const char * line, const unsigned int * line_length);
hlipka 0:7c9aa931c67c 392
hlipka 0:7c9aa931c67c 393 /**
hlipka 0:7c9aa931c67c 394 * Extracts the fields with enclosures
hlipka 0:7c9aa931c67c 395 *
hlipka 0:7c9aa931c67c 396 * This is used when the enclosure character is set.
hlipka 0:7c9aa931c67c 397 *
hlipka 0:7c9aa931c67c 398 * @param[out] row The vector of strings
hlipka 0:7c9aa931c67c 399 * @param[in] line The character array buffer containing the current record/line
hlipka 0:7c9aa931c67c 400 * @param[in] line_length The length of the buffer
hlipka 0:7c9aa931c67c 401 */
hlipka 0:7c9aa931c67c 402 void _get_fields_with_enclosure(csv_row_ptr row, const char * line, const unsigned int * line_length);
hlipka 0:7c9aa931c67c 403
hlipka 0:7c9aa931c67c 404 /**
hlipka 0:7c9aa931c67c 405 * Extracts the fields when enclosure is optional
hlipka 0:7c9aa931c67c 406 *
hlipka 0:7c9aa931c67c 407 * This is used when the enclosure character is optional
hlipka 0:7c9aa931c67c 408 *
hlipka 0:7c9aa931c67c 409 * Hence, there could be fields that use it, and fields that don't.
hlipka 0:7c9aa931c67c 410 *
hlipka 0:7c9aa931c67c 411 * @param[out] row The vector of strings
hlipka 0:7c9aa931c67c 412 * @param[in] line The character array buffer containing the current record/line
hlipka 0:7c9aa931c67c 413 * @param[in] line_length The length of the buffer
hlipka 0:7c9aa931c67c 414 */
hlipka 0:7c9aa931c67c 415 void _get_fields_with_optional_enclosure(csv_row_ptr row, const char * line, const unsigned int * line_length);
hlipka 0:7c9aa931c67c 416
hlipka 0:7c9aa931c67c 417 protected :
hlipka 0:7c9aa931c67c 418
hlipka 0:7c9aa931c67c 419 /**
hlipka 0:7c9aa931c67c 420 * The enclosure character
hlipka 0:7c9aa931c67c 421 *
hlipka 0:7c9aa931c67c 422 * If present or used for a field it is assumed that both ends of the fields are wrapped.
hlipka 0:7c9aa931c67c 423 *
hlipka 0:7c9aa931c67c 424 * This is that single character used in the document to wrap the fields.
hlipka 0:7c9aa931c67c 425 *
hlipka 0:7c9aa931c67c 426 * @see csv_parser::_get_fields_without_enclosure()
hlipka 0:7c9aa931c67c 427 * @see csv_parser::_get_fields_with_enclosure()
hlipka 0:7c9aa931c67c 428 * @see csv_parser::_get_fields_with_optional_enclosure()
hlipka 0:7c9aa931c67c 429 *
hlipka 0:7c9aa931c67c 430 * @var enclosed_char
hlipka 0:7c9aa931c67c 431 */
hlipka 0:7c9aa931c67c 432 char enclosed_char;
hlipka 0:7c9aa931c67c 433
hlipka 0:7c9aa931c67c 434 /**
hlipka 0:7c9aa931c67c 435 * The escape character
hlipka 0:7c9aa931c67c 436 *
hlipka 0:7c9aa931c67c 437 * For now the only valid escape character allowed is the backslash character 0x5C
hlipka 0:7c9aa931c67c 438 *
hlipka 0:7c9aa931c67c 439 * This is only important when the enclosure character is required or optional.
hlipka 0:7c9aa931c67c 440 *
hlipka 0:7c9aa931c67c 441 * This is the backslash character used to escape enclosure characters found within the fields.
hlipka 0:7c9aa931c67c 442 *
hlipka 0:7c9aa931c67c 443 * @see csv_parser::_get_fields_with_enclosure()
hlipka 0:7c9aa931c67c 444 * @see csv_parser::_get_fields_with_optional_enclosure()
hlipka 0:7c9aa931c67c 445 * @todo Update the code to accept other escape characters besides the backslash
hlipka 0:7c9aa931c67c 446 *
hlipka 0:7c9aa931c67c 447 * @var escaped_char
hlipka 0:7c9aa931c67c 448 */
hlipka 0:7c9aa931c67c 449 char escaped_char;
hlipka 0:7c9aa931c67c 450
hlipka 0:7c9aa931c67c 451 /**
hlipka 0:7c9aa931c67c 452 * The field terminator
hlipka 0:7c9aa931c67c 453 *
hlipka 0:7c9aa931c67c 454 * This is the single character used to mark the end of a column in the text file.
hlipka 0:7c9aa931c67c 455 *
hlipka 0:7c9aa931c67c 456 * Common characters used include the comma, tab, and semi-colons.
hlipka 0:7c9aa931c67c 457 *
hlipka 0:7c9aa931c67c 458 * This is the single character used to separate fields within a record.
hlipka 0:7c9aa931c67c 459 *
hlipka 0:7c9aa931c67c 460 * @var field_term_char
hlipka 0:7c9aa931c67c 461 */
hlipka 0:7c9aa931c67c 462 char field_term_char;
hlipka 0:7c9aa931c67c 463
hlipka 0:7c9aa931c67c 464 /**
hlipka 0:7c9aa931c67c 465 * The record terminator
hlipka 0:7c9aa931c67c 466 *
hlipka 0:7c9aa931c67c 467 * This is the single character used to mark the end of a record in the text file.
hlipka 0:7c9aa931c67c 468 *
hlipka 0:7c9aa931c67c 469 * The most popular one is the new line character however it is possible to use others as well.
hlipka 0:7c9aa931c67c 470 *
hlipka 0:7c9aa931c67c 471 * This is the single character used to mark the end of a record
hlipka 0:7c9aa931c67c 472 *
hlipka 0:7c9aa931c67c 473 * @see csv_parser::get_row()
hlipka 0:7c9aa931c67c 474 *
hlipka 0:7c9aa931c67c 475 * @var line_term_char
hlipka 0:7c9aa931c67c 476 */
hlipka 0:7c9aa931c67c 477 char line_term_char;
hlipka 0:7c9aa931c67c 478
hlipka 0:7c9aa931c67c 479 /**
hlipka 0:7c9aa931c67c 480 * Enclosure length
hlipka 0:7c9aa931c67c 481 *
hlipka 0:7c9aa931c67c 482 * This is the length of the enclosure character
hlipka 0:7c9aa931c67c 483 *
hlipka 0:7c9aa931c67c 484 * @see csv_parser::csv_parser()
hlipka 0:7c9aa931c67c 485 * @see csv_parser::set_enclosed_char()
hlipka 0:7c9aa931c67c 486 *
hlipka 0:7c9aa931c67c 487 * @var enclosed_length
hlipka 0:7c9aa931c67c 488 */
hlipka 0:7c9aa931c67c 489 unsigned int enclosed_length;
hlipka 0:7c9aa931c67c 490
hlipka 0:7c9aa931c67c 491 /**
hlipka 0:7c9aa931c67c 492 * The length of the escape character
hlipka 0:7c9aa931c67c 493 *
hlipka 0:7c9aa931c67c 494 * Right now this is really not being used.
hlipka 0:7c9aa931c67c 495 *
hlipka 0:7c9aa931c67c 496 * It may be used in future versions of the object.
hlipka 0:7c9aa931c67c 497 *
hlipka 0:7c9aa931c67c 498 * @todo Update the code to accept other escape characters besides the backslash
hlipka 0:7c9aa931c67c 499 *
hlipka 0:7c9aa931c67c 500 * @var escaped_length
hlipka 0:7c9aa931c67c 501 */
hlipka 0:7c9aa931c67c 502 unsigned int escaped_length;
hlipka 0:7c9aa931c67c 503
hlipka 0:7c9aa931c67c 504 /**
hlipka 0:7c9aa931c67c 505 * Length of the field terminator
hlipka 0:7c9aa931c67c 506 *
hlipka 0:7c9aa931c67c 507 * For now this is not being used. It will be used in future versions of the object.
hlipka 0:7c9aa931c67c 508 *
hlipka 0:7c9aa931c67c 509 * @var field_term_length
hlipka 0:7c9aa931c67c 510 */
hlipka 0:7c9aa931c67c 511 unsigned int field_term_length;
hlipka 0:7c9aa931c67c 512
hlipka 0:7c9aa931c67c 513 /**
hlipka 0:7c9aa931c67c 514 * Length of the record terminator
hlipka 0:7c9aa931c67c 515 *
hlipka 0:7c9aa931c67c 516 * For now this is not being used. It will be used in future versions of the object.
hlipka 0:7c9aa931c67c 517 *
hlipka 0:7c9aa931c67c 518 * @var line_term_length
hlipka 0:7c9aa931c67c 519 */
hlipka 0:7c9aa931c67c 520 unsigned int line_term_length;
hlipka 0:7c9aa931c67c 521
hlipka 0:7c9aa931c67c 522 /**
hlipka 0:7c9aa931c67c 523 * Number of records to discard
hlipka 0:7c9aa931c67c 524 *
hlipka 0:7c9aa931c67c 525 * This variable controls how many records in the file are skipped before parsing begins.
hlipka 0:7c9aa931c67c 526 *
hlipka 0:7c9aa931c67c 527 * @see csv_parser::_skip_lines()
hlipka 0:7c9aa931c67c 528 * @see csv_parser::set_skip_lines()
hlipka 0:7c9aa931c67c 529 *
hlipka 0:7c9aa931c67c 530 * @var ignore_num_lines
hlipka 0:7c9aa931c67c 531 */
hlipka 0:7c9aa931c67c 532 unsigned int ignore_num_lines;
hlipka 0:7c9aa931c67c 533
hlipka 0:7c9aa931c67c 534 /**
hlipka 0:7c9aa931c67c 535 * Number of times the get_row() method has been called
hlipka 0:7c9aa931c67c 536 *
hlipka 0:7c9aa931c67c 537 * @see csv_parser::get_row()
hlipka 0:7c9aa931c67c 538 * @var record_count
hlipka 0:7c9aa931c67c 539 */
hlipka 0:7c9aa931c67c 540 unsigned int record_count;
hlipka 0:7c9aa931c67c 541
hlipka 0:7c9aa931c67c 542 /**
hlipka 0:7c9aa931c67c 543 * The CSV File Pointer
hlipka 0:7c9aa931c67c 544 *
hlipka 0:7c9aa931c67c 545 * This is the pointer to the CSV file
hlipka 0:7c9aa931c67c 546 *
hlipka 0:7c9aa931c67c 547 * @var input_fp
hlipka 0:7c9aa931c67c 548 */
hlipka 0:7c9aa931c67c 549 FILE * input_fp;
hlipka 0:7c9aa931c67c 550
hlipka 0:7c9aa931c67c 551 /**
hlipka 0:7c9aa931c67c 552 * Buffer to input file name
hlipka 0:7c9aa931c67c 553 *
hlipka 0:7c9aa931c67c 554 * This buffer is used to store the name of the file that is being parsed
hlipka 0:7c9aa931c67c 555 *
hlipka 0:7c9aa931c67c 556 * @var input_filename
hlipka 0:7c9aa931c67c 557 */
hlipka 0:7c9aa931c67c 558 char * input_filename;
hlipka 0:7c9aa931c67c 559
hlipka 0:7c9aa931c67c 560 /**
hlipka 0:7c9aa931c67c 561 * Mode in which the CSV file will be parsed
hlipka 0:7c9aa931c67c 562 *
hlipka 0:7c9aa931c67c 563 * The various values are explained below
hlipka 0:7c9aa931c67c 564 *
hlipka 0:7c9aa931c67c 565 * @li ENCLOSURE_NONE (1) means the CSV file does not use any enclosure characters for the fields
hlipka 0:7c9aa931c67c 566 * @li ENCLOSURE_REQUIRED (2) means the CSV file requires enclosure characters for all the fields
hlipka 0:7c9aa931c67c 567 * @li ENCLOSURE_OPTIONAL (3) means the use of enclosure characters for the fields is optional
hlipka 0:7c9aa931c67c 568 *
hlipka 0:7c9aa931c67c 569 * @see csv_parser::get_row()
hlipka 0:7c9aa931c67c 570 * @see csv_parser::_read_single_line()
hlipka 0:7c9aa931c67c 571 * @see csv_parser::_get_fields_without_enclosure()
hlipka 0:7c9aa931c67c 572 * @see csv_parser::_get_fields_with_enclosure()
hlipka 0:7c9aa931c67c 573 * @see csv_parser::_get_fields_with_optional_enclosure()
hlipka 0:7c9aa931c67c 574 *
hlipka 0:7c9aa931c67c 575 * @var enclosure_type
hlipka 0:7c9aa931c67c 576 */
hlipka 0:7c9aa931c67c 577 enclosure_type_t enclosure_type;
hlipka 0:7c9aa931c67c 578
hlipka 0:7c9aa931c67c 579 /**
hlipka 0:7c9aa931c67c 580 * There are still more records to parse
hlipka 0:7c9aa931c67c 581 *
hlipka 0:7c9aa931c67c 582 * This boolean property is an internal indicator of whether there are still records in the
hlipka 0:7c9aa931c67c 583 * file to be parsed.
hlipka 0:7c9aa931c67c 584 *
hlipka 0:7c9aa931c67c 585 * @see csv_parser::has_more_rows()
hlipka 0:7c9aa931c67c 586 * @var more_rows
hlipka 0:7c9aa931c67c 587 */
hlipka 0:7c9aa931c67c 588 bool more_rows;
hlipka 0:7c9aa931c67c 589
hlipka 0:7c9aa931c67c 590 }; /* class csv_parser */
hlipka 0:7c9aa931c67c 591
hlipka 0:7c9aa931c67c 592 #endif /* CSV_PARSER_HPP_INCLUDED */