42 #ifndef CSV_IO_NO_THREAD
45 #include <condition_variable>
58 struct base : std::exception{
59 virtual void format_error_message()
const = 0;
61 const char*what()
const noexcept
override{
62 format_error_message();
63 return error_message_buffer;
66 mutable char error_message_buffer[512];
69 const int max_file_name_length = 255;
73 std::memset(file_name, 0,
sizeof(file_name));
76 void set_file_name(
const char*file_name){
77 if(file_name !=
nullptr){
80 (strncpy(this->file_name, file_name,
sizeof(this->file_name)));
81 this->file_name[
sizeof(this->file_name)-1] =
'\0';
83 this->file_name[0] =
'\0';
87 char file_name[max_file_name_length+1];
95 void set_file_line(
int file_line){
96 this->file_line = file_line;
107 void set_errno(
int errno_value){
108 this->errno_value = errno_value;
118 void format_error_message()
const override{
120 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
121 "Can not open file \"%s\" because \"%s\"."
122 , file_name, std::strerror(errno_value));
124 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
125 "Can not open file \"%s\"."
134 void format_error_message()
const override{
135 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
136 "Line number %d in file \"%s\" exceeds the maximum length of 2^24-1."
137 , file_line, file_name);
144 virtual int read(
char*buffer,
int size)=0;
154 std::setvbuf(file, 0, _IONBF, 0);
157 int read(
char*buffer,
int size){
158 return std::fread(buffer, 1, size, file);
173 int read(
char*buffer,
int size){
174 in.read(buffer, size);
188 int read(
char*buffer,
int desired_byte_count){
189 int to_copy_byte_count = desired_byte_count;
190 if(remaining_byte_count < to_copy_byte_count)
191 to_copy_byte_count = remaining_byte_count;
192 std::memcpy(buffer, str, to_copy_byte_count);
193 remaining_byte_count -= to_copy_byte_count;
194 str += to_copy_byte_count;
195 return to_copy_byte_count;
202 long long remaining_byte_count;
205 #ifndef CSV_IO_NO_THREAD
208 void init(std::unique_ptr<ByteSourceBase>arg_byte_source){
209 std::unique_lock<std::mutex>guard(lock);
210 byte_source = std::move(arg_byte_source);
211 desired_byte_count = -1;
212 termination_requested =
false;
213 worker = std::thread(
215 std::unique_lock<std::mutex>guard(lock);
218 read_requested_condition.wait(
221 return desired_byte_count != -1 || termination_requested;
224 if(termination_requested)
227 read_byte_count = byte_source->read(buffer, desired_byte_count);
228 desired_byte_count = -1;
229 if(read_byte_count == 0)
231 read_finished_condition.notify_one();
234 read_error = std::current_exception();
236 read_finished_condition.notify_one();
241 bool is_valid()
const{
242 return byte_source !=
nullptr;
245 void start_read(
char*arg_buffer,
int arg_desired_byte_count){
246 std::unique_lock<std::mutex>guard(lock);
248 desired_byte_count = arg_desired_byte_count;
249 read_byte_count = -1;
250 read_requested_condition.notify_one();
254 std::unique_lock<std::mutex>guard(lock);
255 read_finished_condition.wait(
258 return read_byte_count != -1 || read_error;
262 std::rethrow_exception(read_error);
264 return read_byte_count;
268 if(byte_source !=
nullptr){
270 std::unique_lock<std::mutex>guard(lock);
271 termination_requested =
true;
273 read_requested_condition.notify_one();
279 std::unique_ptr<ByteSourceBase>byte_source;
283 bool termination_requested;
284 std::exception_ptr read_error;
286 int desired_byte_count;
290 std::condition_variable read_finished_condition;
291 std::condition_variable read_requested_condition;
297 void init(std::unique_ptr<ByteSourceBase>arg_byte_source){
298 byte_source = std::move(arg_byte_source);
301 bool is_valid()
const{
302 return byte_source !=
nullptr;
305 void start_read(
char*arg_buffer,
int arg_desired_byte_count){
307 desired_byte_count = arg_desired_byte_count;
311 return byte_source->read(buffer, desired_byte_count);
314 std::unique_ptr<ByteSourceBase>byte_source;
316 int desired_byte_count;
322 static const int block_len = 1<<20;
323 std::unique_ptr<char[]>buffer;
324 #ifdef CSV_IO_NO_THREAD
332 char file_name[error::max_file_name_length+1];
335 static std::unique_ptr<ByteSourceBase> open_file(
const char*file_name){
338 FILE*file = std::fopen(file_name,
"rb");
343 err.set_file_name(file_name);
349 void init(std::unique_ptr<ByteSourceBase>byte_source){
352 buffer = std::unique_ptr<char[]>(
new char[3*block_len]);
354 data_end = byte_source->read(buffer.get(), 2*block_len);
357 if(data_end >= 3 && buffer[0] ==
'\xEF' && buffer[1] ==
'\xBB' && buffer[2] ==
'\xBF')
360 if(data_end == 2*block_len){
361 reader.init(std::move(byte_source));
362 reader.start_read(buffer.get() + 2*block_len, block_len);
372 set_file_name(file_name);
373 init(open_file(file_name));
376 explicit LineReader(
const std::string&file_name){
377 set_file_name(file_name.c_str());
378 init(open_file(file_name.c_str()));
381 LineReader(
const char*file_name, std::unique_ptr<ByteSourceBase>byte_source){
382 set_file_name(file_name);
383 init(std::move(byte_source));
386 LineReader(
const std::string&file_name, std::unique_ptr<ByteSourceBase>byte_source){
387 set_file_name(file_name.c_str());
388 init(std::move(byte_source));
391 LineReader(
const char*file_name,
const char*data_begin,
const char*data_end){
392 set_file_name(file_name);
396 LineReader(
const std::string&file_name,
const char*data_begin,
const char*data_end){
397 set_file_name(file_name.c_str());
402 set_file_name(file_name);
406 LineReader(
const std::string&file_name, FILE*file){
407 set_file_name(file_name.c_str());
411 LineReader(
const char*file_name, std::istream&in){
412 set_file_name(file_name);
416 LineReader(
const std::string&file_name, std::istream&in){
417 set_file_name(file_name.c_str());
421 void set_file_name(
const std::string&file_name){
422 set_file_name(file_name.c_str());
425 void set_file_name(
const char*file_name){
426 if(file_name !=
nullptr){
427 strncpy(this->file_name, file_name,
sizeof(this->file_name));
428 this->file_name[
sizeof(this->file_name)-1] =
'\0';
430 this->file_name[0] =
'\0';
434 const char*get_truncated_file_name()
const{
438 void set_file_line(
unsigned file_line){
439 this->file_line = file_line;
442 unsigned get_file_line()
const{
447 if(data_begin == data_end)
452 assert(data_begin < data_end);
453 assert(data_end <= block_len*2);
455 if(data_begin >= block_len){
456 std::memcpy(buffer.get(), buffer.get()+block_len, block_len);
457 data_begin -= block_len;
458 data_end -= block_len;
459 if(reader.is_valid())
461 data_end += reader.finish_read();
462 std::memcpy(buffer.get()+block_len, buffer.get()+2*block_len, block_len);
463 reader.start_read(buffer.get() + 2*block_len, block_len);
467 int line_end = data_begin;
468 while(line_end != data_end && buffer[line_end] !=
'\n'){
472 if(line_end - data_begin + 1 > block_len){
474 err.set_file_name(file_name);
475 err.set_file_line(file_line);
479 if(line_end != data_end && buffer[line_end] ==
'\n'){
480 buffer[line_end] =
'\0';
485 buffer[line_end] =
'\0';
489 if(line_end != data_begin && buffer[line_end-1] ==
'\r')
490 buffer[line_end-1] =
'\0';
492 char*ret = buffer.get() + data_begin;
493 data_begin = line_end+1;
504 const int max_column_name_length = 63;
507 std::memset(column_name, 0, max_column_name_length+1);
510 void set_column_name(
const char*column_name){
511 if(column_name !=
nullptr){
512 std::strncpy(this->column_name, column_name, max_column_name_length);
513 this->column_name[max_column_name_length] =
'\0';
515 this->column_name[0] =
'\0';
519 char column_name[max_column_name_length+1];
523 const int max_column_content_length = 63;
527 std::memset(column_content, 0, max_column_content_length+1);
530 void set_column_content(
const char*column_content){
531 if(column_content !=
nullptr){
532 std::strncpy(this->column_content, column_content, max_column_content_length);
533 this->column_content[max_column_content_length] =
'\0';
535 this->column_content[0] =
'\0';
539 char column_content[max_column_content_length+1];
547 void format_error_message()
const override{
548 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
549 R
"(Extra column "%s" in header of file "%s".)"
550 , column_name, file_name);
558 void format_error_message()
const override{
559 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
560 R
"(Missing column "%s" in header of file "%s".)"
561 , column_name, file_name);
569 void format_error_message()
const override{
570 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
571 R
"(Duplicated column "%s" in header of file "%s".)"
572 , column_name, file_name);
579 void format_error_message()
const override{
580 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
581 "Header missing in file \"%s\"."
590 void format_error_message()
const override{
591 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
592 "Too few columns in line %d in file \"%s\"."
593 , file_line, file_name);
601 void format_error_message()
const override{
602 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
603 "Too many columns in line %d in file \"%s\"."
604 , file_line, file_name);
612 void format_error_message()
const override{
613 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
614 "Escaped string was not closed in line %d in file \"%s\"."
615 , file_line, file_name);
625 void format_error_message()
const override{
626 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
627 R
"(The integer "%s" must be positive or 0 in column "%s" in file "%s" in line "%d".)"
628 , column_content, column_name, file_name, file_line);
638 void format_error_message()
const override{
639 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
640 R
"(The integer "%s" contains an invalid digit in column "%s" in file "%s" in line "%d".)"
641 , column_content, column_name, file_name, file_line);
651 void format_error_message()
const override{
652 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
653 R
"(The integer "%s" overflows in column "%s" in file "%s" in line "%d".)"
654 , column_content, column_name, file_name, file_line);
664 void format_error_message()
const override{
665 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
666 R
"(The integer "%s" underflows in column "%s" in file "%s" in line "%d".)"
667 , column_content, column_name, file_name, file_line);
677 void format_error_message()
const override{
678 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
679 R
"(The content "%s" of column "%s" in file "%s" in line "%d" is not a single character.)"
680 , column_content, column_name, file_name, file_line);
685 using ignore_column =
unsigned int;
686 static const ignore_column ignore_no_column = 0;
687 static const ignore_column ignore_extra_column = 1;
688 static const ignore_column ignore_missing_column = 2;
690 template<
char ... trim_char_list>
693 constexpr
static bool is_trim_char(
char){
697 template<
class ...OtherTrimChars>
698 constexpr
static bool is_trim_char(
char c,
char trim_char, OtherTrimChars...other_trim_chars){
699 return c == trim_char || is_trim_char(c, other_trim_chars...);
703 static void trim(
char*&str_begin,
char*&str_end){
704 while(str_begin != str_end && is_trim_char(*str_begin, trim_char_list...))
706 while(str_begin != str_end && is_trim_char(*(str_end-1), trim_char_list...))
714 static bool is_comment(
const char*){
719 template<
char ... comment_start_char_list>
722 constexpr
static bool is_comment_start_char(
char){
726 template<
class ...OtherCommentStartChars>
727 constexpr
static bool is_comment_start_char(
char c,
char comment_start_char, OtherCommentStartChars...other_comment_start_chars){
728 return c == comment_start_char || is_comment_start_char(c, other_comment_start_chars...);
733 static bool is_comment(
const char*line){
734 return is_comment_start_char(*line, comment_start_char_list...);
739 static bool is_comment(
const char*line){
742 while(*line ==
' ' || *line ==
'\t'){
751 template<
char ... comment_start_char_list>
753 static bool is_comment(
const char*line){
760 static const char*find_next_column_end(
const char*col_begin){
761 while(*col_begin != sep && *col_begin !=
'\0')
766 static void unescape(
char*&,
char*&){
771 template<
char sep,
char quote>
773 static const char*find_next_column_end(
const char*col_begin){
774 while(*col_begin != sep && *col_begin !=
'\0')
775 if(*col_begin != quote)
780 while(*col_begin != quote){
781 if(*col_begin ==
'\0')
786 }
while(*col_begin == quote);
791 static void unescape(
char*&col_begin,
char*&col_end){
792 if(col_end - col_begin >= 2){
793 if(*col_begin == quote && *(col_end-1) == quote){
796 char*out = col_begin;
797 for(
char*in = col_begin; in!=col_end; ++in){
798 if(*in == quote && (in+1) != col_end && *(in+1) == quote){
814 static void on_overflow(T&){
819 static void on_underflow(T&){
826 static void on_overflow(T&){}
829 static void on_underflow(T&){}
834 static void on_overflow(T&x){
837 x = (std::numeric_limits<T>::max)();
841 static void on_underflow(T&x){
842 x = (std::numeric_limits<T>::min)();
848 template<
class quote_policy>
849 void chop_next_column(
850 char*&line,
char*&col_begin,
char*&col_end
852 assert(line !=
nullptr);
856 col_end = col_begin + (quote_policy::find_next_column_end(col_begin) - col_begin);
858 if(*col_end ==
'\0'){
866 template<
class trim_policy,
class quote_policy>
870 const std::vector<int>&col_order
872 for (
int i : col_order) {
874 throw ::io::error::too_few_columns();
875 char*col_begin, *col_end;
876 chop_next_column<quote_policy>(line, col_begin, col_end);
879 trim_policy::trim(col_begin, col_end);
880 quote_policy::unescape(col_begin, col_end);
882 sorted_col[i] = col_begin;
886 throw ::io::error::too_many_columns();
889 template<
unsigned column_count,
class trim_policy,
class quote_policy>
890 void parse_header_line(
892 std::vector<int>&col_order,
893 const std::string*col_name,
894 ignore_column ignore_policy
898 bool found[column_count];
899 std::fill(found, found + column_count,
false);
901 char*col_begin,*col_end;
902 chop_next_column<quote_policy>(line, col_begin, col_end);
904 trim_policy::trim(col_begin, col_end);
905 quote_policy::unescape(col_begin, col_end);
907 for(
unsigned i=0; i<column_count; ++i)
908 if(col_begin == col_name[i]){
910 error::duplicated_column_in_header err;
911 err.set_column_name(col_begin);
915 col_order.push_back(i);
920 if(ignore_policy & ::io::ignore_extra_column)
921 col_order.push_back(-1);
923 error::extra_column_in_header err;
924 err.set_column_name(col_begin);
929 if(!(ignore_policy & ::io::ignore_missing_column)){
930 for(
unsigned i=0; i<column_count; ++i){
932 error::missing_column_in_header err;
933 err.set_column_name(col_name[i].c_str());
940 template<
class overflow_policy>
941 void parse(
char*col,
char &x){
943 throw error::invalid_single_character();
947 throw error::invalid_single_character();
950 template<
class overflow_policy>
951 void parse(
char*col, std::string&x){
955 template<
class overflow_policy>
956 void parse(
char*col,
const char*&x){
960 template<
class overflow_policy>
961 void parse(
char*col,
char*&x){
965 template<
class overflow_policy,
class T>
966 void parse_unsigned_integer(
const char*col, T&x){
969 if(
'0' <= *col && *col <=
'9'){
971 if(x > ((std::numeric_limits<T>::max)()-y)/10){
972 overflow_policy::on_overflow(x);
977 throw error::no_digit();
982 template<
class overflow_policy>
void parse(
char*col,
unsigned char &x)
983 {parse_unsigned_integer<overflow_policy>(col, x);}
984 template<
class overflow_policy>
void parse(
char*col,
unsigned short &x)
985 {parse_unsigned_integer<overflow_policy>(col, x);}
986 template<
class overflow_policy>
void parse(
char*col,
unsigned int &x)
987 {parse_unsigned_integer<overflow_policy>(col, x);}
988 template<
class overflow_policy>
void parse(
char*col,
unsigned long &x)
989 {parse_unsigned_integer<overflow_policy>(col, x);}
990 template<
class overflow_policy>
void parse(
char*col,
unsigned long long &x)
991 {parse_unsigned_integer<overflow_policy>(col, x);}
993 template<
class overflow_policy,
class T>
994 void parse_signed_integer(
const char*col, T&x){
1000 if(
'0' <= *col && *col <=
'9'){
1002 if(x < ((std::numeric_limits<T>::min)()+y)/10){
1003 overflow_policy::on_underflow(x);
1008 throw error::no_digit();
1012 }
else if(*col ==
'+')
1014 parse_unsigned_integer<overflow_policy>(col, x);
1017 template<
class overflow_policy>
void parse(
char*col,
signed char &x)
1018 {parse_signed_integer<overflow_policy>(col, x);}
1019 template<
class overflow_policy>
void parse(
char*col,
signed short &x)
1020 {parse_signed_integer<overflow_policy>(col, x);}
1021 template<
class overflow_policy>
void parse(
char*col,
signed int &x)
1022 {parse_signed_integer<overflow_policy>(col, x);}
1023 template<
class overflow_policy>
void parse(
char*col,
signed long &x)
1024 {parse_signed_integer<overflow_policy>(col, x);}
1025 template<
class overflow_policy>
void parse(
char*col,
signed long long &x)
1026 {parse_signed_integer<overflow_policy>(col, x);}
1029 void parse_float(
const char*col, T&x){
1030 bool is_neg =
false;
1034 }
else if(*col ==
'+')
1038 while(
'0' <= *col && *col <=
'9'){
1045 if(*col ==
'.'|| *col ==
','){
1048 while(
'0' <= *col && *col <=
'9'){
1056 if(*col ==
'e' || *col ==
'E'){
1060 parse_signed_integer<set_to_max_on_overflow>(col, e);
1084 throw error::no_digit();
1091 template<
class overflow_policy>
void parse(
char*col,
float&x) { parse_float(col, x); }
1092 template<
class overflow_policy>
void parse(
char*col,
double&x) { parse_float(col, x); }
1093 template<
class overflow_policy>
void parse(
char*col,
long double&x) { parse_float(col, x); }
1095 template<
class overflow_policy,
class T>
1096 void parse(
char*col, T&x){
1103 static_assert(
sizeof(T)!=
sizeof(T),
1104 "Can not parse this type. Only buildin integrals, floats, char, char*, const char* and std::string are supported");
1109 template<
unsigned column_count,
1110 class trim_policy = trim_chars<' ', '\t'>,
1111 class quote_policy = no_quote_escape<','>,
1112 class overflow_policy = throw_on_overflow,
1113 class comment_policy = no_comment
1119 char*row[column_count];
1120 std::string column_names[column_count];
1122 std::vector<int>col_order;
1124 template<
class ...ColNames>
1125 void set_column_names(std::string s, ColNames...cols){
1126 column_names[column_count-
sizeof...(ColNames)-1] = std::move(s);
1127 set_column_names(std::forward<ColNames>(cols)...);
1130 void set_column_names(){}
1138 template<
class ...Args>
1139 explicit CSVReader(Args&&...args):in(std::forward<Args>(args)...){
1140 std::fill(row, row+column_count,
nullptr);
1141 col_order.resize(column_count);
1142 for(
unsigned i=0; i<column_count; ++i)
1144 for(
unsigned i=1; i<=column_count; ++i)
1145 column_names[i-1] =
"col"+std::to_string(i);
1149 return in.next_line();
1152 template<
class ...ColNames>
1153 void read_header(ignore_column ignore_policy, ColNames...cols){
1154 static_assert(
sizeof...(ColNames)>=column_count,
"not enough column names specified");
1155 static_assert(
sizeof...(ColNames)<=column_count,
"too many column names specified");
1157 set_column_names(std::forward<ColNames>(cols)...);
1161 line = in.next_line();
1164 }
while(comment_policy::is_comment(line));
1166 detail::parse_header_line
1167 <column_count, trim_policy, quote_policy>
1168 (line, col_order, column_names, ignore_policy);
1170 err.set_file_name(in.get_truncated_file_name());
1175 template<
class ...ColNames>
1176 void set_header(ColNames...cols){
1177 static_assert(
sizeof...(ColNames)>=column_count,
1178 "not enough column names specified");
1179 static_assert(
sizeof...(ColNames)<=column_count,
1180 "too many column names specified");
1181 set_column_names(std::forward<ColNames>(cols)...);
1182 std::fill(row, row+column_count,
nullptr);
1183 col_order.resize(column_count);
1184 for(
unsigned i=0; i<column_count; ++i)
1188 bool has_column(
const std::string&name)
const {
1189 return col_order.end() != std::find(
1190 col_order.begin(), col_order.end(),
1191 std::find(std::begin(column_names), std::end(column_names), name)
1192 - std::begin(column_names));
1195 void set_file_name(
const std::string&file_name){
1196 in.set_file_name(file_name);
1199 void set_file_name(
const char*file_name){
1200 in.set_file_name(file_name);
1203 const char*get_truncated_file_name()
const{
1204 return in.get_truncated_file_name();
1207 void set_file_line(
unsigned file_line){
1208 in.set_file_line(file_line);
1211 unsigned get_file_line()
const{
1212 return in.get_file_line();
1216 void parse_helper(std::size_t){}
1218 template<
class T,
class ...ColType>
1219 void parse_helper(std::size_t r, T&t, ColType&...cols){
1223 ::io::detail::parse<overflow_policy>(row[r], t);
1225 err.set_column_content(row[r]);
1229 err.set_column_name(column_names[r].c_str());
1233 parse_helper(r+1, cols...);
1238 template<
class ...ColType>
1239 bool read_row(ColType& ...cols){
1240 static_assert(
sizeof...(ColType)>=column_count,
1241 "not enough columns specified");
1242 static_assert(
sizeof...(ColType)<=column_count,
1243 "too many columns specified");
1249 line = in.next_line();
1252 }
while(comment_policy::is_comment(line));
1254 detail::parse_line<trim_policy, quote_policy>
1255 (line, row, col_order);
1257 parse_helper(0, cols...);
1259 err.set_file_name(in.get_truncated_file_name());
1263 err.set_file_line(in.get_file_line());