diff --git a/include/rosa/support/csv/CSVReader.hpp b/include/rosa/support/csv/CSVReader.hpp index 8824fac..3275dc4 100755 --- a/include/rosa/support/csv/CSVReader.hpp +++ b/include/rosa/support/csv/CSVReader.hpp @@ -1,802 +1,782 @@ //===-- rosa/support/csv/CSVReader.hpp --------------------------*- C++ -*-===// // // The RoSA Framework // //===----------------------------------------------------------------------===// /// /// \file rosa/support/csv/CSVReader.hpp /// /// \author David Juhasz (david.juhasz@tuwien.ac.at) /// /// \date 2017-2019 /// /// \brief Facitilities to read CSV files. /// /// \note The implementation is based on the solution at /// https://stackoverflow.com/a/1120224 /// //===----------------------------------------------------------------------===// #ifndef ROSA_SUPPORT_CSV_CSVREADER_HPP #define ROSA_SUPPORT_CSV_CSVREADER_HPP #include "rosa/support/debug.hpp" #include #include #include #include namespace rosa { namespace csv { /// Anonymous namespace providing implementation details for /// \c rosa::csv::CSVIterator, consider it private. namespace { /// Provides facility for parsing values from one row CSV data. /// /// \tparam T type of values to parse from the line /// \tparam IsSignedInt if \p T is a signed integral type, always use default /// \tparam IsUnsignedInt if \p T is an unsigned integral type, always use /// default /// \tparam IsFloat if \p T is a floating-point type, always use default /// \tparam IsString if \p T is \c std::string, always use default /// /// \note Specializations of this `struct` are provided for arithmentic types /// and \c std::string. template ::value && std::is_signed::value), bool IsUnsignedInt = (std::is_integral::value && std::is_unsigned::value), bool IsFloat = std::is_floating_point::value, bool IsString = std::is_same::value> struct CSVRowParser; /// Specialization for signed integral types. /// /// \tparam T type of values to parse from the line /// /// \pre \p T is a signed integral type:\code /// std::is_integral::value && std::is_signed::value /// \endcode template struct CSVRowParser { STATIC_ASSERT((std::is_integral::value && std::is_signed::value), "wrong type"); // Sanity check. /// Parses a given row of CSV data into a given container. /// /// \p Data is cleared and then filled with values parsed from \p LineStream. /// Entries in the line are to be separated by commas, the character `,`. A /// trailing comma results in an empty entry at the end of the line. No empty /// entry should be present otherwise. /// /// \note Parsed values are silently converted to type \p T. /// /// \param [in,out] LineStream the line to parse /// \param [in,out] Data the container to store the parsed values static void parse(std::stringstream &LineStream, std::vector &Data, char Delimeter = ',') { std::string Cell; Data.clear(); while (std::getline(LineStream, Cell, Delimeter)) { Data.push_back(static_cast(std::stoll(Cell))); } // This checks for a trailing comma with no data after it. if (!LineStream && Cell.empty()) { // If there was a trailing comma then add an empty element. Data.push_back(0); } } /// Parses a given column of a given row of CSV data into a given container. /// /// \p Data is cleared and then filled with values parsed from \p LineStream. /// Entries in the line are to be separated by commas, the character `,`. A /// trailing comma results in an empty entry at the end of the line. No empty /// entry should be present otherwise. /// /// \note Parsed values are silently converted to type \p T. /// /// \param [in,out] LineStream the line to parse /// \param [in,out] Data the container to store the parsed values static void parseValue(std::stringstream &LineStream, std::vector &Data, size_t Column = 1, char Delimeter = ',') { std::string Cell; size_t currentColumn = 1; Data.clear(); while (std::getline(LineStream, Cell, Delimeter)) { if(currentColumn == Column){ Data.push_back(static_cast(std::stoll(Cell))); break; } currentColumn = currentColumn + 1; } // This checks for a trailing comma with no data after it. if (!LineStream && Cell.empty()) { // If there was a trailing comma then add an empty element. Data.push_back(0); } } }; /// Specialization for unsigned integral types. /// /// \tparam T type of values to parse from the line /// /// \pre \p T is an unsigned integral type:\code /// std::is_integral::value && std::is_unsigned::value /// \endcode template struct CSVRowParser { STATIC_ASSERT((std::is_integral::value && std::is_unsigned::value), "wrong type"); // Sanity check. /// Parses a given row of CSV data into a given container. /// /// \p Data is cleared and then filled with values parsed from \p LineStream. /// Entries in the line are to be separated by commas, the character `,`. A /// trailing comma results in an empty entry at the end of the line. No empty /// entry should be present otherwise. /// /// \note Parsed values are silently converted to type \p T. /// /// \param [in,out] LineStream the line to parse /// \param [in,out] Data the container to store the parsed values static void parse(std::stringstream &LineStream, std::vector &Data, char Delimeter = ',') { std::string Cell; Data.clear(); while (std::getline(LineStream, Cell, Delimeter)) { Data.push_back(static_cast(std::stoull(Cell))); } // This checks for a trailing comma with no data after it. if (!LineStream && Cell.empty()) { // If there was a trailing comma then add an empty element. Data.push_back(0); } } /// Parses a given column of a given row of CSV data into a given container. /// /// \p Data is cleared and then filled with values parsed from \p LineStream. /// Entries in the line are to be separated by commas, the character `,`. A /// trailing comma results in an empty entry at the end of the line. No empty /// entry should be present otherwise. /// /// \note Parsed values are silently converted to type \p T. /// /// \param [in,out] LineStream the line to parse /// \param [in,out] Data the container to store the parsed values static void parseValue(std::stringstream &LineStream, std::vector &Data, size_t Column = 1, char Delimeter = ',') { std::string Cell; size_t currentColumn = 1; Data.clear(); while (std::getline(LineStream, Cell, Delimeter)) { if(currentColumn == Column){ Data.push_back(static_cast(std::stoll(Cell))); break; } currentColumn = currentColumn + 1; } // This checks for a trailing comma with no data after it. if (!LineStream && Cell.empty()) { // If there was a trailing comma then add an empty element. Data.push_back(0); } } }; /// Specialization for floating-point types. /// /// \tparam T type of values to parse from the line /// /// \pre \p T is a floating-point type:\code /// std::is_floating_point::value /// \endcode template struct CSVRowParser { STATIC_ASSERT((std::is_floating_point::value), "wrong type"); // Sanity check. /// Parses a given row of CSV data into a given container. /// /// \p Data is cleared and then filled with values parsed from \p LineStream. /// Entries in the line are to be separated by commas, the character `,`. A /// trailing comma results in an empty entry at the end of the line. No empty /// entry should be present otherwise. /// /// \note Parsed values are silently converted to type \p T. /// /// \param [in,out] LineStream the line to parse /// \param [in,out] Data the container to store the parsed values static void parse(std::stringstream &LineStream, std::vector &Data, char Delimeter = ',') { std::string Cell; Data.clear(); while (std::getline(LineStream, Cell, Delimeter)) { Data.push_back(static_cast(std::stold(Cell))); } // This checks for a trailing comma with no data after it. if (!LineStream && Cell.empty()) { // If there was a trailing comma then add an empty element. Data.push_back(0); } } /// Parses a given column of a given row of CSV data into a given container. /// /// \p Data is cleared and then filled with values parsed from \p LineStream. /// Entries in the line are to be separated by commas, the character `,`. A /// trailing comma results in an empty entry at the end of the line. No empty /// entry should be present otherwise. /// /// \note Parsed values are silently converted to type \p T. /// /// \param [in,out] LineStream the line to parse /// \param [in,out] Data the container to store the parsed values static void parseValue(std::stringstream &LineStream, std::vector &Data, size_t Column = 1, char Delimeter = ',') { std::string Cell; size_t currentColumn = 1; Data.clear(); while (std::getline(LineStream, Cell, Delimeter)) { if(currentColumn == Column){ Data.push_back(static_cast(std::stold(Cell))); break; } currentColumn = currentColumn + 1; } // This checks for a trailing comma with no data after it. if (!LineStream && Cell.empty()) { // If there was a trailing comma then add an empty element. Data.push_back(0); } } }; /// Specialization for \c std::string. /// /// \tparam T type of values to parse from the line /// /// \pre \p T is \c std::string:\code /// std::is_same::value /// \endcode template struct CSVRowParser { STATIC_ASSERT((std::is_same::value), "wrong type"); // Sanity check. /// Parses a given row of CSV data into a given container. /// /// \p Data is cleared and then filled with values parsed from \p LineStream. /// Entries in the line are to be separated by commas, the character `,`. A /// trailing comma results in an empty entry at the end of the line. No empty /// entry should be present otherwise. /// /// \param [in,out] LineStream the line to parse /// \param [in,out] Data the container to store the parsed values static void parse(std::stringstream &LineStream, std::vector &Data, char Delimeter = ',') { std::string Cell; Data.clear(); while (std::getline(LineStream, Cell, Delimeter)) { Data.push_back(Cell); } // This checks for a trailing comma with no data after it. if (!LineStream && Cell.empty()) { // If there was a trailing comma then add an empty element. Data.push_back(""); } } /// Parses a given column of a given row of CSV data into a given container. /// /// \p Data is cleared and then filled with values parsed from \p LineStream. /// Entries in the line are to be separated by commas, the character `,`. A /// trailing comma results in an empty entry at the end of the line. No empty /// entry should be present otherwise. /// /// \note Parsed values are silently converted to type \p T. /// /// \param [in,out] LineStream the line to parse /// \param [in,out] Data the container to store the parsed values static void parseValue(std::stringstream &LineStream, std::vector &Data, size_t Column = 1, char Delimeter = ',') { std::string Cell; size_t currentColumn = 1; Data.clear(); while (std::getline(LineStream, Cell, Delimeter)) { if(currentColumn == Column){ Data.push_back(static_cast(std::stoll(Cell))); break; } currentColumn = currentColumn + 1; } // This checks for a trailing comma with no data after it. if (!LineStream && Cell.empty()) { // If there was a trailing comma then add an empty element. Data.push_back(0); } } }; /// Parses and stores entries from a row of CSV data. /// /// \tparam T type of values to parse and store, i.e. entries in the row /// /// \note The implementation relies on \c rosa::csv::CSVRowParser, which is /// implemented only for `arithmetic` types -- signed and unsigned integral and /// floating-point types -- and for \c std::string. Those are the valid values /// for \p T. template class CSVRow { public: /// Gives a constant reference for an entry at a given position of the row. /// /// \note No bounds checking is performed. /// /// \param Index the position of the entry /// /// \return constant reference for the stored entry at position \p Index const T &operator[](const size_t Index) const noexcept { return Data[Index]; } /// Tells the number of entries stored in the row. /// /// \return number of stored entries. size_t size(void) const noexcept { return Data.size(); } /// Parses and stores one row of CSV data. /// /// The function reads one line from \p Str and parses it into /// \c rosa::csv::CSVRow::Data using \c rosa::csv::CSVRowParser. /// /// \param [in,out] Str input stream of a CSV file void readNextRow(std::istream &Str) { std::string Line; std::getline(Str, Line); std::stringstream LineStream(Line); CSVRowParser::parse(LineStream, Data, Delimeter); } bool isNumeric(const std::string& input){ return std::all_of(input.begin(), input.end(), ::isdigit); } void checkIfHeader(std::istream &Str){ std::string Line; std::getline(Str, Line); std::vector FirstRowValues; std::stringstream LineStream(Line); std::string Value; bool HasHeaderLocal = true; CSVRowParser::parse(LineStream, FirstRowValues, Delimeter); for(std::vector::iterator it = FirstRowValues.begin(); it != FirstRowValues.end(); ++it){ Value = *it; if(isNumeric(Value)){ HasHeaderLocal = false; } } if(HasHeaderLocal){ Header.swap(FirstRowValues); /* // only for debugging purpose. for(std::vector::iterator it = Header.begin(); it != Header.end(); ++it){ std::cout << *it << ", "; } std::cout << std::endl; */ }else { std::stringstream LineStream2(Line); CSVRowParser::parse(LineStream2, Data, Delimeter); } HasHeader = HasHeaderLocal; isFirstRow = false; } bool isHavingHeader(){ return HasHeader; } inline void setDelimeter(char Delimeter){ this->Delimeter = Delimeter; } inline char getDelimeter(){ return this->Delimeter; } inline void setEndOfLine(char EndOfLine){ this->EndOfLine = EndOfLine; } inline char getEndOfLine(){ return this->EndOfLine; } inline bool isThisFirstRow(){ return this->isFirstRow; } inline void setColumn(const size_t & Column){ this->Column = Column; } private: std::vector Data; ///< Stores parsed entries uint64_t RowNumber = 0; ///< Current row number bool isFirstRow = true; ///< Is this the first row bool isFirstRowRead = false; ///< Is the first row read already bool HasHeader = false; ///< Has the current csv file a header std::vector Header; ///< Stores the header entries if available char Delimeter = ','; ///< Stores the delimeter between data entries char EndOfLine = '\n'; ///< Stores the end of line character size_t Column = 1; ///< Stores the column to get the data out of the row }; /// Parses and stores entries from a row of CSV data. /// It parses an entire row and takes only the value of the corresponding column. /// /// \tparam T type of values to parse and store, i.e. entries in the row /// /// \note The implementation relies on \c rosa::csv::CSVRowParser, which is /// implemented only for `arithmetic` types -- signed and unsigned integral and /// floating-point types -- and for \c std::string. Those are the valid values /// for \p T. template class CSVValue { public: /// Gives a constant reference for an entry at a given position of the row. /// /// \note No bounds checking is performed. /// /// \param Index the position of the entry /// /// \return constant reference for the stored entry at position \p Index const T &operator[](const size_t Index) const noexcept { return Data[Index]; } /// Tells the number of entries stored in the row. /// /// \return number of stored entries. size_t size(void) const noexcept { return Data.size(); } /// Parses and stores one row of CSV data. /// /// The function reads one line from \p Str and parses it into /// \c rosa::csv::CSVRow::Data using \c rosa::csv::CSVRowParser. /// /// \param [in,out] Str input stream of a CSV file void readNextValue(std::istream &Str) { std::string Line; std::getline(Str, Line); std::stringstream LineStream(Line); CSVRowParser::parseValue(LineStream, Data, Column, Delimeter); } bool isNumeric(const std::string& input){ return std::all_of(input.begin(), input.end(), ::isdigit); } void checkIfHeader(std::istream &Str){ std::string Line; std::getline(Str, Line); std::vector FirstRowValues; std::stringstream LineStream(Line); std::string Value; bool HasHeaderLocal = true; CSVRowParser::parse(LineStream, FirstRowValues, Delimeter); for(std::vector::iterator it = FirstRowValues.begin(); it != FirstRowValues.end(); ++it){ Value = *it; if(isNumeric(Value)){ HasHeaderLocal = false; } } if(HasHeaderLocal){ Header.swap(FirstRowValues); /* // only for debugging purpose. for(std::vector::iterator it = Header.begin(); it != Header.end(); ++it){ std::cout << *it << ", "; } std::cout << std::endl; */ }else { std::stringstream LineStream2(Line); CSVRowParser::parseValue(LineStream2, Data, Delimeter, Column); } HasHeader = HasHeaderLocal; isFirstRow = false; } bool isHavingHeader(){ return HasHeader; } inline void setDelimeter(char Delimeter){ this->Delimeter = Delimeter; } inline char getDelimeter(){ return this->Delimeter; } inline void setEndOfLine(char EndOfLine){ this->EndOfLine = EndOfLine; } inline char getEndOfLine(){ return this->EndOfLine; } inline bool isThisFirstRow(){ return this->isFirstRow; } inline void setColumn(const size_t & Column){ this->Column = Column; } private: std::vector Data; ///< Stores parsed entries uint64_t RowNumber = 0; ///< Current row number bool isFirstRow = true; ///< Is this the first row bool isFirstRowRead = false; ///< Is the first row read already bool HasHeader = false; ///< Has the current csv file a header std::vector Header; ///< Stores the header entries if available char Delimeter = ','; ///< Stores the delimeter between data entries char EndOfLine = '\n'; ///< Stores the end of line character size_t Column = 1; ///< Stores the column to get the data out of the row }; /// Reads a row of CSV data into \c rosa::csv::CSVRow. /// /// The next line is read from \p Str by calling /// \c rosa::csv::CSVRow::readNextRow on \p Data. /// /// \note A CSV file should contain no empty lines. /// /// \param [in,out] Str input stream of a CSV file /// \param [in,out] Data object to read the next line into /// /// \return \p Str after reading one line from it template std::istream &operator>>(std::istream &Str, CSVRow &Data) { if (Data.isThisFirstRow()){ Data.checkIfHeader(Str); if(Data.isHavingHeader()){ Data.readNextRow(Str); } }else { Data.readNextRow(Str); } /* // just for debugging purpose char c; while(Str.get(c)){ std::cout << c; } std::cout << std::endl; */ return Str; } /// Reads a value of CSV data into \c rosa::csv::CSVValue. /// /// The next line is read from \p Str by calling /// \c rosa::csv::CSVValue::readNextValue on \p Data. /// /// \note A CSV file should contain no empty lines. /// /// \param [in,out] Str input stream of a CSV file /// \param [in,out] Data object to read the next line into /// /// \return \p Str after reading one line from it template std::istream &operator>>(std::istream &Str, CSVValue &Data) { if (Data.isThisFirstRow()){ Data.checkIfHeader(Str); if(Data.isHavingHeader()){ Data.readNextValue(Str); } }else { Data.readNextValue(Str); } /* // just for debugging purpose char c; while(Str.get(c)){ std::cout << c; } std::cout << std::endl; */ return Str; } } // End namespace /// Provides `InputIterator` features for iterating over a CSV file in a /// flat way. /// /// The iterator hides rows of the CSV file, and iterates over the entries /// row-by-row. /// /// \note A CSV file should contain no empty lines. /// /// \tparam T type of values to iterate over, i.e. entries in the CSV file. /// /// \note The implementation relies on \c rosa::csv::CSVRow, which in turn /// relies on \c rosa::csv::CSVRowParser, which is implemented only for /// `arithmetic` types -- signed and unsigned integral types and floating-point /// types -- and for \c std::string. Those are the valid values for \p T. template class CSVFlatIterator { public: /// \defgroup CSVFlatIteratorTypedefs Typedefs of rosa::csv::CSVFlatIterator /// /// Standard `typedef`s for iterators. /// ///@{ typedef std::input_iterator_tag iterator_category; ///< Category of the iterator. typedef T value_type; ///< Type of values iterated over. typedef std::size_t difference_type; ///< Type to identify distance. typedef T *pointer; ///< Pointer to the type iterated over. typedef T &reference; ///< Reference to the type iterated over. ///@} /// Creates a new instance. /// /// \param [in,out] S input stream to iterate over CSVFlatIterator(std::istream &S, size_t Column = 1, bool DataRow = true, const char Delimeter = ',', const char EndOfLine = '\n') : Str(S.good() ? &S : nullptr), Pos((size_t)(-1)), Delimeter(Delimeter), EndOfLine(EndOfLine), Column(Column), DataRow(DataRow){ Row.setDelimeter(Delimeter); Row.setEndOfLine(EndOfLine); Row.setColumn(Column); Value.setDelimeter(Delimeter); Value.setEndOfLine(EndOfLine); Value.setColumn(Column); // \c rosa::csv::CSVFlatIterator::Pos is initialized to `-1` so the first // incrementation here will set it properly. ++(*this); } - /// Creates a new instance. - /// - /// \param [in,out] S input stream to iterate over - CSVFlatIterator(std::istream &S, size_t Column = 1) - : Str(S.good() ? &S : nullptr), - Pos((size_t)(-1)), - Delimeter(','), EndOfLine('\n'), - Column(Column), DataRow(true){ - Row.setDelimeter(Delimeter); - Row.setEndOfLine(EndOfLine); - Row.setColumn(Column); - - Value.setDelimeter(Delimeter); - Value.setEndOfLine(EndOfLine); - Value.setColumn(Column); - // \c rosa::csv::CSVFlatIterator::Pos is initialized to `-1` so the first - // incrementation here will set it properly. - ++(*this); - } - /// Creates an empty new instance. CSVFlatIterator(void) noexcept : Str(nullptr), DataRow(true) {} /// Pre-increment operator. /// /// The implementation moves over the entries in the current row and advances /// to the next row when the end of the current row is reached. If the end of /// the input stream is reached, the operator becomes empty and has no /// further effect. /// /// \return \p this object after incrementing it. CSVFlatIterator &operator++() { if (Str) { ++Pos; if(DataRow){ if (Pos == Row.size()) { if (!((*Str) >> Row)) { Str = nullptr; --Pos; // Stay on the last entry forever. } else { Pos = 0; } } }else{ if (Pos == Value.size()) { if (!((*Str) >> Value)) { Str = nullptr; --Pos; // Stay on the last entry forever. } else { Pos = 0; } } } } return *this; } /// Post-increment operator. /// /// The implementation uses the pre-increment operator and returns a copy of /// the original state of \p this object. /// /// \return \p this object before incrementing it. CSVFlatIterator operator++(int) { CSVFlatIterator Tmp(*this); ++(*this); return Tmp; } /// Returns a constant reference to the current entry. /// /// \note Should not dereference the iterator when it is empty. /// /// \return constanStartt reference to the current entry. const T &operator*(void)const noexcept { if(DataRow){ return Row[Pos]; }else { return Value[Pos]; } } /// Returns a constant pointer to the current entry. /// /// \note Should not dereference the iterator when it is empty. /// /// \return constant pointer to the current entry. const T *operator->(void)const noexcept { if(DataRow){ return &Row[Pos]; }else { return &Value[Pos]; } } /// Tells if \p this object is equal to another one. /// /// Two \c rosa::csv::CSVReader instances are equal if and only if they are /// the same or both are empty. /// /// \param RHS other object to compare to /// /// \return whether \p this object is equal with \p RHS bool operator==(const CSVFlatIterator &RHS) const noexcept { return ((this == &RHS) || ((this->Str == nullptr) && (RHS.Str == nullptr))); } /// Tells if \p this object is not equal to another one. /// /// \see rosa::csv::CSVReader::operator== /// /// \param RHS other object to compare to /// /// \return whether \p this object is not equal with \p RHS. bool operator!=(const CSVFlatIterator &RHS) const noexcept { return !((*this) == RHS); } inline void setDelimeter(char Delimter){ this->Delimeter = Delimter; } inline char getDelimeter(){ return this->Delimeter; } private: std::istream *Str; ///< Input stream of a CSV file to iterate over. CSVRow Row; ///< Content of the current row iterating over. CSVValue Value; ///< Content of the specified column in the current row. size_t Pos; ///< Current position within the current row. char Delimeter; ///< Delimeter between the entries char EndOfLine; ///< stores the end of line character size_t Column; ///< Index of the column to get data out of it. bool DataRow; ///< Indicates if you want to read a row or only one column out of a row. true = use CSVRow }; } // End namespace csv } // End namespace rosa #endif // ROSA_SUPPORT_CSV_CSVREADER_HPP