YPC  0.2.0
csv.h
1 // Copyright: (2012-2015) Ben Strasser <code@ben-strasser.net>
2 // License: BSD-3
3 //
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are met:
8 //
9 // 1. Redistributions of source code must retain the above copyright notice,
10 // this list of conditions and the following disclaimer.
11 //
12 // 2. Redistributions in binary form must reproduce the above copyright notice,
13 // this list of conditions and the following disclaimer in the documentation
14 // and/or other materials provided with the distribution.
15 //
16 // 3. Neither the name of the copyright holder nor the names of its contributors
17 // may be used to endorse or promote products derived from this software
18 // without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 // POSSIBILITY OF SUCH DAMAGE.
31 
32 #ifndef CSV_H
33 #define CSV_H
34 
35 #include <vector>
36 #include <string>
37 #include <cstring>
38 #include <algorithm>
39 #include <utility>
40 #include <cstdio>
41 #include <exception>
42 #ifndef CSV_IO_NO_THREAD
43 #include <mutex>
44 #include <thread>
45 #include <condition_variable>
46 #endif
47 #include <memory>
48 #include <cassert>
49 #include <cerrno>
50 #include <istream>
51 
52 namespace io{
54  // LineReader //
56 
57  namespace error{
58  struct base : std::exception{
59  virtual void format_error_message()const = 0;
60 
61  const char*what()const noexcept override{
62  format_error_message();
63  return error_message_buffer;
64  }
65 
66  mutable char error_message_buffer[512];
67  };
68 
69  const int max_file_name_length = 255;
70 
73  std::memset(file_name, 0, sizeof(file_name));
74  }
75 
76  void set_file_name(const char*file_name){
77  if(file_name != nullptr){
78  // This call to strncpy has parenthesis around it
79  // to silence the GCC -Wstringop-truncation warning
80  (strncpy(this->file_name, file_name, sizeof(this->file_name)));
81  this->file_name[sizeof(this->file_name)-1] = '\0';
82  }else{
83  this->file_name[0] = '\0';
84  }
85  }
86 
87  char file_name[max_file_name_length+1];
88  };
89 
92  file_line = -1;
93  }
94 
95  void set_file_line(int file_line){
96  this->file_line = file_line;
97  }
98 
99  int file_line;
100  };
101 
102  struct with_errno{
103  with_errno(){
104  errno_value = 0;
105  }
106 
107  void set_errno(int errno_value){
108  this->errno_value = errno_value;
109  }
110 
111  int errno_value;
112  };
113 
115  base,
117  with_errno{
118  void format_error_message()const override{
119  if(errno_value != 0)
120  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
121  "Can not open file \"%s\" because \"%s\"."
122  , file_name, std::strerror(errno_value));
123  else
124  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
125  "Can not open file \"%s\"."
126  , file_name);
127  }
128  };
129 
131  base,
134  void format_error_message()const override{
135  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
136  "Line number %d in file \"%s\" exceeds the maximum length of 2^24-1."
137  , file_line, file_name);
138  }
139  };
140  }
141 
143  public:
144  virtual int read(char*buffer, int size)=0;
145  virtual ~ByteSourceBase(){}
146  };
147 
148  namespace detail{
149 
151  public:
152  explicit OwningStdIOByteSourceBase(FILE*file):file(file){
153  // Tell the std library that we want to do the buffering ourself.
154  std::setvbuf(file, 0, _IONBF, 0);
155  }
156 
157  int read(char*buffer, int size){
158  return std::fread(buffer, 1, size, file);
159  }
160 
162  std::fclose(file);
163  }
164 
165  private:
166  FILE*file;
167  };
168 
170  public:
171  explicit NonOwningIStreamByteSource(std::istream&in):in(in){}
172 
173  int read(char*buffer, int size){
174  in.read(buffer, size);
175  return in.gcount();
176  }
177 
179 
180  private:
181  std::istream&in;
182  };
183 
185  public:
186  NonOwningStringByteSource(const char*str, long long size):str(str), remaining_byte_count(size){}
187 
188  int read(char*buffer, int desired_byte_count){
189  int to_copy_byte_count = desired_byte_count;
190  if(remaining_byte_count < to_copy_byte_count)
191  to_copy_byte_count = remaining_byte_count;
192  std::memcpy(buffer, str, to_copy_byte_count);
193  remaining_byte_count -= to_copy_byte_count;
194  str += to_copy_byte_count;
195  return to_copy_byte_count;
196  }
197 
199 
200  private:
201  const char*str;
202  long long remaining_byte_count;
203  };
204 
205  #ifndef CSV_IO_NO_THREAD
207  public:
208  void init(std::unique_ptr<ByteSourceBase>arg_byte_source){
209  std::unique_lock<std::mutex>guard(lock);
210  byte_source = std::move(arg_byte_source);
211  desired_byte_count = -1;
212  termination_requested = false;
213  worker = std::thread(
214  [&]{
215  std::unique_lock<std::mutex>guard(lock);
216  try{
217  for(;;){
218  read_requested_condition.wait(
219  guard,
220  [&]{
221  return desired_byte_count != -1 || termination_requested;
222  }
223  );
224  if(termination_requested)
225  return;
226 
227  read_byte_count = byte_source->read(buffer, desired_byte_count);
228  desired_byte_count = -1;
229  if(read_byte_count == 0)
230  break;
231  read_finished_condition.notify_one();
232  }
233  }catch(...){
234  read_error = std::current_exception();
235  }
236  read_finished_condition.notify_one();
237  }
238  );
239  }
240 
241  bool is_valid()const{
242  return byte_source != nullptr;
243  }
244 
245  void start_read(char*arg_buffer, int arg_desired_byte_count){
246  std::unique_lock<std::mutex>guard(lock);
247  buffer = arg_buffer;
248  desired_byte_count = arg_desired_byte_count;
249  read_byte_count = -1;
250  read_requested_condition.notify_one();
251  }
252 
253  int finish_read(){
254  std::unique_lock<std::mutex>guard(lock);
255  read_finished_condition.wait(
256  guard,
257  [&]{
258  return read_byte_count != -1 || read_error;
259  }
260  );
261  if(read_error)
262  std::rethrow_exception(read_error);
263  else
264  return read_byte_count;
265  }
266 
268  if(byte_source != nullptr){
269  {
270  std::unique_lock<std::mutex>guard(lock);
271  termination_requested = true;
272  }
273  read_requested_condition.notify_one();
274  worker.join();
275  }
276  }
277 
278  private:
279  std::unique_ptr<ByteSourceBase>byte_source;
280 
281  std::thread worker;
282 
283  bool termination_requested;
284  std::exception_ptr read_error;
285  char*buffer;
286  int desired_byte_count;
287  int read_byte_count;
288 
289  std::mutex lock;
290  std::condition_variable read_finished_condition;
291  std::condition_variable read_requested_condition;
292  };
293  #endif
294 
296  public:
297  void init(std::unique_ptr<ByteSourceBase>arg_byte_source){
298  byte_source = std::move(arg_byte_source);
299  }
300 
301  bool is_valid()const{
302  return byte_source != nullptr;
303  }
304 
305  void start_read(char*arg_buffer, int arg_desired_byte_count){
306  buffer = arg_buffer;
307  desired_byte_count = arg_desired_byte_count;
308  }
309 
310  int finish_read(){
311  return byte_source->read(buffer, desired_byte_count);
312  }
313  private:
314  std::unique_ptr<ByteSourceBase>byte_source;
315  char*buffer;
316  int desired_byte_count;
317  };
318  }
319 
320  class LineReader{
321  private:
322  static const int block_len = 1<<20;
323  std::unique_ptr<char[]>buffer; // must be constructed before (and thus destructed after) the reader!
324  #ifdef CSV_IO_NO_THREAD
326  #else
328  #endif
329  int data_begin;
330  int data_end;
331 
332  char file_name[error::max_file_name_length+1];
333  unsigned file_line;
334 
335  static std::unique_ptr<ByteSourceBase> open_file(const char*file_name){
336  // We open the file in binary mode as it makes no difference under *nix
337  // and under Windows we handle \r\n newlines ourself.
338  FILE*file = std::fopen(file_name, "rb");
339  if(file == 0){
340  int x = errno; // store errno as soon as possible, doing it after constructor call can fail.
342  err.set_errno(x);
343  err.set_file_name(file_name);
344  throw err;
345  }
346  return std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file));
347  }
348 
349  void init(std::unique_ptr<ByteSourceBase>byte_source){
350  file_line = 0;
351 
352  buffer = std::unique_ptr<char[]>(new char[3*block_len]);
353  data_begin = 0;
354  data_end = byte_source->read(buffer.get(), 2*block_len);
355 
356  // Ignore UTF-8 BOM
357  if(data_end >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF')
358  data_begin = 3;
359 
360  if(data_end == 2*block_len){
361  reader.init(std::move(byte_source));
362  reader.start_read(buffer.get() + 2*block_len, block_len);
363  }
364  }
365 
366  public:
367  LineReader() = delete;
368  LineReader(const LineReader&) = delete;
369  LineReader&operator=(const LineReader&) = delete;
370 
371  explicit LineReader(const char*file_name){
372  set_file_name(file_name);
373  init(open_file(file_name));
374  }
375 
376  explicit LineReader(const std::string&file_name){
377  set_file_name(file_name.c_str());
378  init(open_file(file_name.c_str()));
379  }
380 
381  LineReader(const char*file_name, std::unique_ptr<ByteSourceBase>byte_source){
382  set_file_name(file_name);
383  init(std::move(byte_source));
384  }
385 
386  LineReader(const std::string&file_name, std::unique_ptr<ByteSourceBase>byte_source){
387  set_file_name(file_name.c_str());
388  init(std::move(byte_source));
389  }
390 
391  LineReader(const char*file_name, const char*data_begin, const char*data_end){
392  set_file_name(file_name);
393  init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(data_begin, data_end-data_begin)));
394  }
395 
396  LineReader(const std::string&file_name, const char*data_begin, const char*data_end){
397  set_file_name(file_name.c_str());
398  init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(data_begin, data_end-data_begin)));
399  }
400 
401  LineReader(const char*file_name, FILE*file){
402  set_file_name(file_name);
403  init(std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file)));
404  }
405 
406  LineReader(const std::string&file_name, FILE*file){
407  set_file_name(file_name.c_str());
408  init(std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file)));
409  }
410 
411  LineReader(const char*file_name, std::istream&in){
412  set_file_name(file_name);
413  init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningIStreamByteSource(in)));
414  }
415 
416  LineReader(const std::string&file_name, std::istream&in){
417  set_file_name(file_name.c_str());
418  init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningIStreamByteSource(in)));
419  }
420 
421  void set_file_name(const std::string&file_name){
422  set_file_name(file_name.c_str());
423  }
424 
425  void set_file_name(const char*file_name){
426  if(file_name != nullptr){
427  strncpy(this->file_name, file_name, sizeof(this->file_name));
428  this->file_name[sizeof(this->file_name)-1] = '\0';
429  }else{
430  this->file_name[0] = '\0';
431  }
432  }
433 
434  const char*get_truncated_file_name()const{
435  return file_name;
436  }
437 
438  void set_file_line(unsigned file_line){
439  this->file_line = file_line;
440  }
441 
442  unsigned get_file_line()const{
443  return file_line;
444  }
445 
446  char*next_line(){
447  if(data_begin == data_end)
448  return nullptr;
449 
450  ++file_line;
451 
452  assert(data_begin < data_end);
453  assert(data_end <= block_len*2);
454 
455  if(data_begin >= block_len){
456  std::memcpy(buffer.get(), buffer.get()+block_len, block_len);
457  data_begin -= block_len;
458  data_end -= block_len;
459  if(reader.is_valid())
460  {
461  data_end += reader.finish_read();
462  std::memcpy(buffer.get()+block_len, buffer.get()+2*block_len, block_len);
463  reader.start_read(buffer.get() + 2*block_len, block_len);
464  }
465  }
466 
467  int line_end = data_begin;
468  while(line_end != data_end && buffer[line_end] != '\n'){
469  ++line_end;
470  }
471 
472  if(line_end - data_begin + 1 > block_len){
474  err.set_file_name(file_name);
475  err.set_file_line(file_line);
476  throw err;
477  }
478 
479  if(line_end != data_end && buffer[line_end] == '\n'){
480  buffer[line_end] = '\0';
481  }else{
482  // some files are missing the newline at the end of the
483  // last line
484  ++data_end;
485  buffer[line_end] = '\0';
486  }
487 
488  // handle windows \r\n-line breaks
489  if(line_end != data_begin && buffer[line_end-1] == '\r')
490  buffer[line_end-1] = '\0';
491 
492  char*ret = buffer.get() + data_begin;
493  data_begin = line_end+1;
494  return ret;
495  }
496  };
497 
498 
500  // CSV //
502 
503  namespace error{
504  const int max_column_name_length = 63;
507  std::memset(column_name, 0, max_column_name_length+1);
508  }
509 
510  void set_column_name(const char*column_name){
511  if(column_name != nullptr){
512  std::strncpy(this->column_name, column_name, max_column_name_length);
513  this->column_name[max_column_name_length] = '\0';
514  }else{
515  this->column_name[0] = '\0';
516  }
517  }
518 
519  char column_name[max_column_name_length+1];
520  };
521 
522 
523  const int max_column_content_length = 63;
524 
527  std::memset(column_content, 0, max_column_content_length+1);
528  }
529 
530  void set_column_content(const char*column_content){
531  if(column_content != nullptr){
532  std::strncpy(this->column_content, column_content, max_column_content_length);
533  this->column_content[max_column_content_length] = '\0';
534  }else{
535  this->column_content[0] = '\0';
536  }
537  }
538 
539  char column_content[max_column_content_length+1];
540  };
541 
542 
544  base,
547  void format_error_message()const override{
548  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
549  R"(Extra column "%s" in header of file "%s".)"
550  , column_name, file_name);
551  }
552  };
553 
555  base,
558  void format_error_message()const override{
559  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
560  R"(Missing column "%s" in header of file "%s".)"
561  , column_name, file_name);
562  }
563  };
564 
566  base,
569  void format_error_message()const override{
570  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
571  R"(Duplicated column "%s" in header of file "%s".)"
572  , column_name, file_name);
573  }
574  };
575 
576  struct header_missing :
577  base,
579  void format_error_message()const override{
580  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
581  "Header missing in file \"%s\"."
582  , file_name);
583  }
584  };
585 
587  base,
590  void format_error_message()const override{
591  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
592  "Too few columns in line %d in file \"%s\"."
593  , file_line, file_name);
594  }
595  };
596 
598  base,
601  void format_error_message()const override{
602  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
603  "Too many columns in line %d in file \"%s\"."
604  , file_line, file_name);
605  }
606  };
607 
609  base,
612  void format_error_message()const override{
613  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
614  "Escaped string was not closed in line %d in file \"%s\"."
615  , file_line, file_name);
616  }
617  };
618 
620  base,
625  void format_error_message()const override{
626  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
627  R"(The integer "%s" must be positive or 0 in column "%s" in file "%s" in line "%d".)"
628  , column_content, column_name, file_name, file_line);
629  }
630  };
631 
632  struct no_digit :
633  base,
638  void format_error_message()const override{
639  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
640  R"(The integer "%s" contains an invalid digit in column "%s" in file "%s" in line "%d".)"
641  , column_content, column_name, file_name, file_line);
642  }
643  };
644 
646  base,
651  void format_error_message()const override{
652  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
653  R"(The integer "%s" overflows in column "%s" in file "%s" in line "%d".)"
654  , column_content, column_name, file_name, file_line);
655  }
656  };
657 
659  base,
664  void format_error_message()const override{
665  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
666  R"(The integer "%s" underflows in column "%s" in file "%s" in line "%d".)"
667  , column_content, column_name, file_name, file_line);
668  }
669  };
670 
672  base,
677  void format_error_message()const override{
678  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
679  R"(The content "%s" of column "%s" in file "%s" in line "%d" is not a single character.)"
680  , column_content, column_name, file_name, file_line);
681  }
682  };
683  }
684 
685  using ignore_column = unsigned int;
686  static const ignore_column ignore_no_column = 0;
687  static const ignore_column ignore_extra_column = 1;
688  static const ignore_column ignore_missing_column = 2;
689 
690  template<char ... trim_char_list>
691  struct trim_chars{
692  private:
693  constexpr static bool is_trim_char(char){
694  return false;
695  }
696 
697  template<class ...OtherTrimChars>
698  constexpr static bool is_trim_char(char c, char trim_char, OtherTrimChars...other_trim_chars){
699  return c == trim_char || is_trim_char(c, other_trim_chars...);
700  }
701 
702  public:
703  static void trim(char*&str_begin, char*&str_end){
704  while(str_begin != str_end && is_trim_char(*str_begin, trim_char_list...))
705  ++str_begin;
706  while(str_begin != str_end && is_trim_char(*(str_end-1), trim_char_list...))
707  --str_end;
708  *str_end = '\0';
709  }
710  };
711 
712 
713  struct no_comment{
714  static bool is_comment(const char*){
715  return false;
716  }
717  };
718 
719  template<char ... comment_start_char_list>
721  private:
722  constexpr static bool is_comment_start_char(char){
723  return false;
724  }
725 
726  template<class ...OtherCommentStartChars>
727  constexpr static bool is_comment_start_char(char c, char comment_start_char, OtherCommentStartChars...other_comment_start_chars){
728  return c == comment_start_char || is_comment_start_char(c, other_comment_start_chars...);
729  }
730 
731  public:
732 
733  static bool is_comment(const char*line){
734  return is_comment_start_char(*line, comment_start_char_list...);
735  }
736  };
737 
739  static bool is_comment(const char*line){
740  if(*line == '\0')
741  return true;
742  while(*line == ' ' || *line == '\t'){
743  ++line;
744  if(*line == 0)
745  return true;
746  }
747  return false;
748  }
749  };
750 
751  template<char ... comment_start_char_list>
753  static bool is_comment(const char*line){
754  return single_line_comment<comment_start_char_list...>::is_comment(line) || empty_line_comment::is_comment(line);
755  }
756  };
757 
758  template<char sep>
760  static const char*find_next_column_end(const char*col_begin){
761  while(*col_begin != sep && *col_begin != '\0')
762  ++col_begin;
763  return col_begin;
764  }
765 
766  static void unescape(char*&, char*&){
767 
768  }
769  };
770 
771  template<char sep, char quote>
773  static const char*find_next_column_end(const char*col_begin){
774  while(*col_begin != sep && *col_begin != '\0')
775  if(*col_begin != quote)
776  ++col_begin;
777  else{
778  do{
779  ++col_begin;
780  while(*col_begin != quote){
781  if(*col_begin == '\0')
783  ++col_begin;
784  }
785  ++col_begin;
786  }while(*col_begin == quote);
787  }
788  return col_begin;
789  }
790 
791  static void unescape(char*&col_begin, char*&col_end){
792  if(col_end - col_begin >= 2){
793  if(*col_begin == quote && *(col_end-1) == quote){
794  ++col_begin;
795  --col_end;
796  char*out = col_begin;
797  for(char*in = col_begin; in!=col_end; ++in){
798  if(*in == quote && (in+1) != col_end && *(in+1) == quote){
799  ++in;
800  }
801  *out = *in;
802  ++out;
803  }
804  col_end = out;
805  *col_end = '\0';
806  }
807  }
808 
809  }
810  };
811 
813  template<class T>
814  static void on_overflow(T&){
815  throw error::integer_overflow();
816  }
817 
818  template<class T>
819  static void on_underflow(T&){
820  throw error::integer_underflow();
821  }
822  };
823 
825  template<class T>
826  static void on_overflow(T&){}
827 
828  template<class T>
829  static void on_underflow(T&){}
830  };
831 
833  template<class T>
834  static void on_overflow(T&x){
835  // using (std::numeric_limits<T>::max) instead of std::numeric_limits<T>::max
836  // to make code including windows.h with its max macro happy
837  x = (std::numeric_limits<T>::max)();
838  }
839 
840  template<class T>
841  static void on_underflow(T&x){
842  x = (std::numeric_limits<T>::min)();
843  }
844  };
845 
846 
847  namespace detail{
848  template<class quote_policy>
849  void chop_next_column(
850  char*&line, char*&col_begin, char*&col_end
851  ){
852  assert(line != nullptr);
853 
854  col_begin = line;
855  // the col_begin + (... - col_begin) removes the constness
856  col_end = col_begin + (quote_policy::find_next_column_end(col_begin) - col_begin);
857 
858  if(*col_end == '\0'){
859  line = nullptr;
860  }else{
861  *col_end = '\0';
862  line = col_end + 1;
863  }
864  }
865 
866  template<class trim_policy, class quote_policy>
867  void parse_line(
868  char*line,
869  char**sorted_col,
870  const std::vector<int>&col_order
871  ){
872  for (int i : col_order) {
873  if(line == nullptr)
874  throw ::io::error::too_few_columns();
875  char*col_begin, *col_end;
876  chop_next_column<quote_policy>(line, col_begin, col_end);
877 
878  if (i != -1) {
879  trim_policy::trim(col_begin, col_end);
880  quote_policy::unescape(col_begin, col_end);
881 
882  sorted_col[i] = col_begin;
883  }
884  }
885  if(line != nullptr)
886  throw ::io::error::too_many_columns();
887  }
888 
889  template<unsigned column_count, class trim_policy, class quote_policy>
890  void parse_header_line(
891  char*line,
892  std::vector<int>&col_order,
893  const std::string*col_name,
894  ignore_column ignore_policy
895  ){
896  col_order.clear();
897 
898  bool found[column_count];
899  std::fill(found, found + column_count, false);
900  while(line){
901  char*col_begin,*col_end;
902  chop_next_column<quote_policy>(line, col_begin, col_end);
903 
904  trim_policy::trim(col_begin, col_end);
905  quote_policy::unescape(col_begin, col_end);
906 
907  for(unsigned i=0; i<column_count; ++i)
908  if(col_begin == col_name[i]){
909  if(found[i]){
910  error::duplicated_column_in_header err;
911  err.set_column_name(col_begin);
912  throw err;
913  }
914  found[i] = true;
915  col_order.push_back(i);
916  col_begin = 0;
917  break;
918  }
919  if(col_begin){
920  if(ignore_policy & ::io::ignore_extra_column)
921  col_order.push_back(-1);
922  else{
923  error::extra_column_in_header err;
924  err.set_column_name(col_begin);
925  throw err;
926  }
927  }
928  }
929  if(!(ignore_policy & ::io::ignore_missing_column)){
930  for(unsigned i=0; i<column_count; ++i){
931  if(!found[i]){
932  error::missing_column_in_header err;
933  err.set_column_name(col_name[i].c_str());
934  throw err;
935  }
936  }
937  }
938  }
939 
940  template<class overflow_policy>
941  void parse(char*col, char &x){
942  if(!*col)
943  throw error::invalid_single_character();
944  x = *col;
945  ++col;
946  if(*col)
947  throw error::invalid_single_character();
948  }
949 
950  template<class overflow_policy>
951  void parse(char*col, std::string&x){
952  x = col;
953  }
954 
955  template<class overflow_policy>
956  void parse(char*col, const char*&x){
957  x = col;
958  }
959 
960  template<class overflow_policy>
961  void parse(char*col, char*&x){
962  x = col;
963  }
964 
965  template<class overflow_policy, class T>
966  void parse_unsigned_integer(const char*col, T&x){
967  x = 0;
968  while(*col != '\0'){
969  if('0' <= *col && *col <= '9'){
970  T y = *col - '0';
971  if(x > ((std::numeric_limits<T>::max)()-y)/10){
972  overflow_policy::on_overflow(x);
973  return;
974  }
975  x = 10*x+y;
976  }else
977  throw error::no_digit();
978  ++col;
979  }
980  }
981 
982  template<class overflow_policy>void parse(char*col, unsigned char &x)
983  {parse_unsigned_integer<overflow_policy>(col, x);}
984  template<class overflow_policy>void parse(char*col, unsigned short &x)
985  {parse_unsigned_integer<overflow_policy>(col, x);}
986  template<class overflow_policy>void parse(char*col, unsigned int &x)
987  {parse_unsigned_integer<overflow_policy>(col, x);}
988  template<class overflow_policy>void parse(char*col, unsigned long &x)
989  {parse_unsigned_integer<overflow_policy>(col, x);}
990  template<class overflow_policy>void parse(char*col, unsigned long long &x)
991  {parse_unsigned_integer<overflow_policy>(col, x);}
992 
993  template<class overflow_policy, class T>
994  void parse_signed_integer(const char*col, T&x){
995  if(*col == '-'){
996  ++col;
997 
998  x = 0;
999  while(*col != '\0'){
1000  if('0' <= *col && *col <= '9'){
1001  T y = *col - '0';
1002  if(x < ((std::numeric_limits<T>::min)()+y)/10){
1003  overflow_policy::on_underflow(x);
1004  return;
1005  }
1006  x = 10*x-y;
1007  }else
1008  throw error::no_digit();
1009  ++col;
1010  }
1011  return;
1012  }else if(*col == '+')
1013  ++col;
1014  parse_unsigned_integer<overflow_policy>(col, x);
1015  }
1016 
1017  template<class overflow_policy>void parse(char*col, signed char &x)
1018  {parse_signed_integer<overflow_policy>(col, x);}
1019  template<class overflow_policy>void parse(char*col, signed short &x)
1020  {parse_signed_integer<overflow_policy>(col, x);}
1021  template<class overflow_policy>void parse(char*col, signed int &x)
1022  {parse_signed_integer<overflow_policy>(col, x);}
1023  template<class overflow_policy>void parse(char*col, signed long &x)
1024  {parse_signed_integer<overflow_policy>(col, x);}
1025  template<class overflow_policy>void parse(char*col, signed long long &x)
1026  {parse_signed_integer<overflow_policy>(col, x);}
1027 
1028  template<class T>
1029  void parse_float(const char*col, T&x){
1030  bool is_neg = false;
1031  if(*col == '-'){
1032  is_neg = true;
1033  ++col;
1034  }else if(*col == '+')
1035  ++col;
1036 
1037  x = 0;
1038  while('0' <= *col && *col <= '9'){
1039  int y = *col - '0';
1040  x *= 10;
1041  x += y;
1042  ++col;
1043  }
1044 
1045  if(*col == '.'|| *col == ','){
1046  ++col;
1047  T pos = 1;
1048  while('0' <= *col && *col <= '9'){
1049  pos /= 10;
1050  int y = *col - '0';
1051  ++col;
1052  x += y*pos;
1053  }
1054  }
1055 
1056  if(*col == 'e' || *col == 'E'){
1057  ++col;
1058  int e;
1059 
1060  parse_signed_integer<set_to_max_on_overflow>(col, e);
1061 
1062  if(e != 0){
1063  T base;
1064  if(e < 0){
1065  base = T(0.1);
1066  e = -e;
1067  }else{
1068  base = T(10);
1069  }
1070 
1071  while(e != 1){
1072  if((e & 1) == 0){
1073  base = base*base;
1074  e >>= 1;
1075  }else{
1076  x *= base;
1077  --e;
1078  }
1079  }
1080  x *= base;
1081  }
1082  }else{
1083  if(*col != '\0')
1084  throw error::no_digit();
1085  }
1086 
1087  if(is_neg)
1088  x = -x;
1089  }
1090 
1091  template<class overflow_policy> void parse(char*col, float&x) { parse_float(col, x); }
1092  template<class overflow_policy> void parse(char*col, double&x) { parse_float(col, x); }
1093  template<class overflow_policy> void parse(char*col, long double&x) { parse_float(col, x); }
1094 
1095  template<class overflow_policy, class T>
1096  void parse(char*col, T&x){
1097  // Mute unused variable compiler warning
1098  (void)col;
1099  (void)x;
1100  // GCC evalutes "false" when reading the template and
1101  // "sizeof(T)!=sizeof(T)" only when instantiating it. This is why
1102  // this strange construct is used.
1103  static_assert(sizeof(T)!=sizeof(T),
1104  "Can not parse this type. Only buildin integrals, floats, char, char*, const char* and std::string are supported");
1105  }
1106 
1107  }
1108 
1109  template<unsigned column_count,
1110  class trim_policy = trim_chars<' ', '\t'>,
1111  class quote_policy = no_quote_escape<','>,
1112  class overflow_policy = throw_on_overflow,
1113  class comment_policy = no_comment
1114  >
1115  class CSVReader{
1116  private:
1117  LineReader in;
1118 
1119  char*row[column_count];
1120  std::string column_names[column_count];
1121 
1122  std::vector<int>col_order;
1123 
1124  template<class ...ColNames>
1125  void set_column_names(std::string s, ColNames...cols){
1126  column_names[column_count-sizeof...(ColNames)-1] = std::move(s);
1127  set_column_names(std::forward<ColNames>(cols)...);
1128  }
1129 
1130  void set_column_names(){}
1131 
1132 
1133  public:
1134  CSVReader() = delete;
1135  CSVReader(const CSVReader&) = delete;
1136  CSVReader&operator=(const CSVReader&);
1137 
1138  template<class ...Args>
1139  explicit CSVReader(Args&&...args):in(std::forward<Args>(args)...){
1140  std::fill(row, row+column_count, nullptr);
1141  col_order.resize(column_count);
1142  for(unsigned i=0; i<column_count; ++i)
1143  col_order[i] = i;
1144  for(unsigned i=1; i<=column_count; ++i)
1145  column_names[i-1] = "col"+std::to_string(i);
1146  }
1147 
1148  char*next_line(){
1149  return in.next_line();
1150  }
1151 
1152  template<class ...ColNames>
1153  void read_header(ignore_column ignore_policy, ColNames...cols){
1154  static_assert(sizeof...(ColNames)>=column_count, "not enough column names specified");
1155  static_assert(sizeof...(ColNames)<=column_count, "too many column names specified");
1156  try{
1157  set_column_names(std::forward<ColNames>(cols)...);
1158 
1159  char*line;
1160  do{
1161  line = in.next_line();
1162  if(!line)
1163  throw error::header_missing();
1164  }while(comment_policy::is_comment(line));
1165 
1166  detail::parse_header_line
1167  <column_count, trim_policy, quote_policy>
1168  (line, col_order, column_names, ignore_policy);
1169  }catch(error::with_file_name&err){
1170  err.set_file_name(in.get_truncated_file_name());
1171  throw;
1172  }
1173  }
1174 
1175  template<class ...ColNames>
1176  void set_header(ColNames...cols){
1177  static_assert(sizeof...(ColNames)>=column_count,
1178  "not enough column names specified");
1179  static_assert(sizeof...(ColNames)<=column_count,
1180  "too many column names specified");
1181  set_column_names(std::forward<ColNames>(cols)...);
1182  std::fill(row, row+column_count, nullptr);
1183  col_order.resize(column_count);
1184  for(unsigned i=0; i<column_count; ++i)
1185  col_order[i] = i;
1186  }
1187 
1188  bool has_column(const std::string&name) const {
1189  return col_order.end() != std::find(
1190  col_order.begin(), col_order.end(),
1191  std::find(std::begin(column_names), std::end(column_names), name)
1192  - std::begin(column_names));
1193  }
1194 
1195  void set_file_name(const std::string&file_name){
1196  in.set_file_name(file_name);
1197  }
1198 
1199  void set_file_name(const char*file_name){
1200  in.set_file_name(file_name);
1201  }
1202 
1203  const char*get_truncated_file_name()const{
1204  return in.get_truncated_file_name();
1205  }
1206 
1207  void set_file_line(unsigned file_line){
1208  in.set_file_line(file_line);
1209  }
1210 
1211  unsigned get_file_line()const{
1212  return in.get_file_line();
1213  }
1214 
1215  private:
1216  void parse_helper(std::size_t){}
1217 
1218  template<class T, class ...ColType>
1219  void parse_helper(std::size_t r, T&t, ColType&...cols){
1220  if(row[r]){
1221  try{
1222  try{
1223  ::io::detail::parse<overflow_policy>(row[r], t);
1224  }catch(error::with_column_content&err){
1225  err.set_column_content(row[r]);
1226  throw;
1227  }
1228  }catch(error::with_column_name&err){
1229  err.set_column_name(column_names[r].c_str());
1230  throw;
1231  }
1232  }
1233  parse_helper(r+1, cols...);
1234  }
1235 
1236 
1237  public:
1238  template<class ...ColType>
1239  bool read_row(ColType& ...cols){
1240  static_assert(sizeof...(ColType)>=column_count,
1241  "not enough columns specified");
1242  static_assert(sizeof...(ColType)<=column_count,
1243  "too many columns specified");
1244  try{
1245  try{
1246 
1247  char*line;
1248  do{
1249  line = in.next_line();
1250  if(!line)
1251  return false;
1252  }while(comment_policy::is_comment(line));
1253 
1254  detail::parse_line<trim_policy, quote_policy>
1255  (line, row, col_order);
1256 
1257  parse_helper(0, cols...);
1258  }catch(error::with_file_name&err){
1259  err.set_file_name(in.get_truncated_file_name());
1260  throw;
1261  }
1262  }catch(error::with_file_line&err){
1263  err.set_file_line(in.get_file_line());
1264  throw;
1265  }
1266 
1267  return true;
1268  }
1269  };
1270 }
1271 #endif
1272 
io::set_to_max_on_overflow
Definition: csv.h:832
io::detail::OwningStdIOByteSourceBase
Definition: csv.h:150
io::detail::SynchronousReader
Definition: csv.h:295
io::no_comment
Definition: csv.h:713
io::single_line_comment
Definition: csv.h:720
io::error::integer_overflow
Definition: csv.h:645
io::empty_line_comment
Definition: csv.h:738
io::detail::NonOwningIStreamByteSource
Definition: csv.h:169
io::error::header_missing
Definition: csv.h:576
io::single_and_empty_line_comment
Definition: csv.h:752
io::error::duplicated_column_in_header
Definition: csv.h:565
io::error::with_file_name
Definition: csv.h:71
io::error::line_length_limit_exceeded
Definition: csv.h:130
io::error::integer_must_be_positive
Definition: csv.h:619
io::error::can_not_open_file
Definition: csv.h:114
io::trim_chars
Definition: csv.h:691
io::error::with_column_content
Definition: csv.h:525
io::CSVReader
Definition: csv.h:1115
io::error::too_few_columns
Definition: csv.h:586
io::error::base
Definition: csv.h:58
io::error::invalid_single_character
Definition: csv.h:671
io::throw_on_overflow
Definition: csv.h:812
io::error::extra_column_in_header
Definition: csv.h:543
io::error::too_many_columns
Definition: csv.h:597
io::detail::AsynchronousReader
Definition: csv.h:206
io::error::no_digit
Definition: csv.h:632
io::double_quote_escape
Definition: csv.h:772
io::detail::NonOwningStringByteSource
Definition: csv.h:184
io::error::with_file_line
Definition: csv.h:90
io::ByteSourceBase
Definition: csv.h:142
io::no_quote_escape
Definition: csv.h:759
io::error::integer_underflow
Definition: csv.h:658
io::error::escaped_string_not_closed
Definition: csv.h:608
io::LineReader
Definition: csv.h:320
io::ignore_overflow
Definition: csv.h:824
io::error::with_errno
Definition: csv.h:102
io::error::missing_column_in_header
Definition: csv.h:554
io::error::with_column_name
Definition: csv.h:505