diff --git a/include/rosa/support/csv/CSVReader.hpp b/include/rosa/support/csv/CSVReader.hpp index 95c1fda..8824fac 100755 --- a/include/rosa/support/csv/CSVReader.hpp +++ b/include/rosa/support/csv/CSVReader.hpp @@ -1,480 +1,802 @@ //===-- rosa/support/csv/CSVReader.hpp --------------------------*- C++ -*-===// // // The RoSA Framework // //===----------------------------------------------------------------------===// /// /// \file rosa/support/csv/CSVReader.hpp /// /// \author David Juhasz (david.juhasz@tuwien.ac.at) /// /// \date 2017-2019 /// /// \brief Facitilities to read CSV files. /// /// \note The implementation is based on the solution at /// https://stackoverflow.com/a/1120224 /// //===----------------------------------------------------------------------===// #ifndef ROSA_SUPPORT_CSV_CSVREADER_HPP #define ROSA_SUPPORT_CSV_CSVREADER_HPP #include "rosa/support/debug.hpp" #include #include #include #include namespace rosa { namespace csv { /// Anonymous namespace providing implementation details for /// \c rosa::csv::CSVIterator, consider it private. namespace { /// Provides facility for parsing values from one row CSV data. /// /// \tparam T type of values to parse from the line /// \tparam IsSignedInt if \p T is a signed integral type, always use default /// \tparam IsUnsignedInt if \p T is an unsigned integral type, always use /// default /// \tparam IsFloat if \p T is a floating-point type, always use default /// \tparam IsString if \p T is \c std::string, always use default /// /// \note Specializations of this `struct` are provided for arithmentic types /// and \c std::string. template ::value && std::is_signed::value), bool IsUnsignedInt = (std::is_integral::value && std::is_unsigned::value), bool IsFloat = std::is_floating_point::value, bool IsString = std::is_same::value> struct CSVRowParser; /// Specialization for signed integral types. /// /// \tparam T type of values to parse from the line /// /// \pre \p T is a signed integral type:\code /// std::is_integral::value && std::is_signed::value /// \endcode template struct CSVRowParser { STATIC_ASSERT((std::is_integral::value && std::is_signed::value), "wrong type"); // Sanity check. /// Parses a given row of CSV data into a given container. /// /// \p Data is cleared and then filled with values parsed from \p LineStream. /// Entries in the line are to be separated by commas, the character `,`. A /// trailing comma results in an empty entry at the end of the line. No empty /// entry should be present otherwise. /// /// \note Parsed values are silently converted to type \p T. /// /// \param [in,out] LineStream the line to parse /// \param [in,out] Data the container to store the parsed values - static void parse(std::stringstream &LineStream, std::vector &Data, char Delimeter) { + static void parse(std::stringstream &LineStream, std::vector &Data, char Delimeter = ',') { std::string Cell; Data.clear(); while (std::getline(LineStream, Cell, Delimeter)) { Data.push_back(static_cast(std::stoll(Cell))); } // This checks for a trailing comma with no data after it. if (!LineStream && Cell.empty()) { // If there was a trailing comma then add an empty element. Data.push_back(0); } } + + /// Parses a given column of a given row of CSV data into a given container. + /// + /// \p Data is cleared and then filled with values parsed from \p LineStream. + /// Entries in the line are to be separated by commas, the character `,`. A + /// trailing comma results in an empty entry at the end of the line. No empty + /// entry should be present otherwise. + /// + /// \note Parsed values are silently converted to type \p T. + /// + /// \param [in,out] LineStream the line to parse + /// \param [in,out] Data the container to store the parsed values + static void parseValue(std::stringstream &LineStream, std::vector &Data, size_t Column = 1, char Delimeter = ',') { + std::string Cell; + size_t currentColumn = 1; + Data.clear(); + while (std::getline(LineStream, Cell, Delimeter)) { + if(currentColumn == Column){ + Data.push_back(static_cast(std::stoll(Cell))); + break; + } + currentColumn = currentColumn + 1; + } + // This checks for a trailing comma with no data after it. + if (!LineStream && Cell.empty()) { + // If there was a trailing comma then add an empty element. + Data.push_back(0); + } + } }; /// Specialization for unsigned integral types. /// /// \tparam T type of values to parse from the line /// /// \pre \p T is an unsigned integral type:\code /// std::is_integral::value && std::is_unsigned::value /// \endcode template struct CSVRowParser { STATIC_ASSERT((std::is_integral::value && std::is_unsigned::value), "wrong type"); // Sanity check. /// Parses a given row of CSV data into a given container. /// /// \p Data is cleared and then filled with values parsed from \p LineStream. /// Entries in the line are to be separated by commas, the character `,`. A /// trailing comma results in an empty entry at the end of the line. No empty /// entry should be present otherwise. /// /// \note Parsed values are silently converted to type \p T. /// /// \param [in,out] LineStream the line to parse /// \param [in,out] Data the container to store the parsed values static void parse(std::stringstream &LineStream, std::vector &Data, char Delimeter = ',') { std::string Cell; Data.clear(); while (std::getline(LineStream, Cell, Delimeter)) { Data.push_back(static_cast(std::stoull(Cell))); } // This checks for a trailing comma with no data after it. if (!LineStream && Cell.empty()) { // If there was a trailing comma then add an empty element. Data.push_back(0); } } + + /// Parses a given column of a given row of CSV data into a given container. + /// + /// \p Data is cleared and then filled with values parsed from \p LineStream. + /// Entries in the line are to be separated by commas, the character `,`. A + /// trailing comma results in an empty entry at the end of the line. No empty + /// entry should be present otherwise. + /// + /// \note Parsed values are silently converted to type \p T. + /// + /// \param [in,out] LineStream the line to parse + /// \param [in,out] Data the container to store the parsed values + static void parseValue(std::stringstream &LineStream, std::vector &Data, size_t Column = 1, char Delimeter = ',') { + std::string Cell; + size_t currentColumn = 1; + Data.clear(); + while (std::getline(LineStream, Cell, Delimeter)) { + if(currentColumn == Column){ + Data.push_back(static_cast(std::stoll(Cell))); + break; + } + currentColumn = currentColumn + 1; + } + // This checks for a trailing comma with no data after it. + if (!LineStream && Cell.empty()) { + // If there was a trailing comma then add an empty element. + Data.push_back(0); + } + } }; /// Specialization for floating-point types. /// /// \tparam T type of values to parse from the line /// /// \pre \p T is a floating-point type:\code /// std::is_floating_point::value /// \endcode template struct CSVRowParser { STATIC_ASSERT((std::is_floating_point::value), "wrong type"); // Sanity check. /// Parses a given row of CSV data into a given container. /// /// \p Data is cleared and then filled with values parsed from \p LineStream. /// Entries in the line are to be separated by commas, the character `,`. A /// trailing comma results in an empty entry at the end of the line. No empty /// entry should be present otherwise. /// /// \note Parsed values are silently converted to type \p T. /// /// \param [in,out] LineStream the line to parse /// \param [in,out] Data the container to store the parsed values static void parse(std::stringstream &LineStream, std::vector &Data, char Delimeter = ',') { std::string Cell; Data.clear(); while (std::getline(LineStream, Cell, Delimeter)) { Data.push_back(static_cast(std::stold(Cell))); } // This checks for a trailing comma with no data after it. if (!LineStream && Cell.empty()) { // If there was a trailing comma then add an empty element. Data.push_back(0); } } + + /// Parses a given column of a given row of CSV data into a given container. + /// + /// \p Data is cleared and then filled with values parsed from \p LineStream. + /// Entries in the line are to be separated by commas, the character `,`. A + /// trailing comma results in an empty entry at the end of the line. No empty + /// entry should be present otherwise. + /// + /// \note Parsed values are silently converted to type \p T. + /// + /// \param [in,out] LineStream the line to parse + /// \param [in,out] Data the container to store the parsed values + static void parseValue(std::stringstream &LineStream, std::vector &Data, size_t Column = 1, char Delimeter = ',') { + std::string Cell; + size_t currentColumn = 1; + Data.clear(); + while (std::getline(LineStream, Cell, Delimeter)) { + if(currentColumn == Column){ + Data.push_back(static_cast(std::stold(Cell))); + break; + } + currentColumn = currentColumn + 1; + } + // This checks for a trailing comma with no data after it. + if (!LineStream && Cell.empty()) { + // If there was a trailing comma then add an empty element. + Data.push_back(0); + } + } }; /// Specialization for \c std::string. /// /// \tparam T type of values to parse from the line /// /// \pre \p T is \c std::string:\code /// std::is_same::value /// \endcode template struct CSVRowParser { STATIC_ASSERT((std::is_same::value), "wrong type"); // Sanity check. /// Parses a given row of CSV data into a given container. /// /// \p Data is cleared and then filled with values parsed from \p LineStream. /// Entries in the line are to be separated by commas, the character `,`. A /// trailing comma results in an empty entry at the end of the line. No empty /// entry should be present otherwise. /// /// \param [in,out] LineStream the line to parse /// \param [in,out] Data the container to store the parsed values static void parse(std::stringstream &LineStream, std::vector &Data, char Delimeter = ',') { std::string Cell; Data.clear(); while (std::getline(LineStream, Cell, Delimeter)) { Data.push_back(Cell); } // This checks for a trailing comma with no data after it. if (!LineStream && Cell.empty()) { // If there was a trailing comma then add an empty element. Data.push_back(""); } } + + /// Parses a given column of a given row of CSV data into a given container. + /// + /// \p Data is cleared and then filled with values parsed from \p LineStream. + /// Entries in the line are to be separated by commas, the character `,`. A + /// trailing comma results in an empty entry at the end of the line. No empty + /// entry should be present otherwise. + /// + /// \note Parsed values are silently converted to type \p T. + /// + /// \param [in,out] LineStream the line to parse + /// \param [in,out] Data the container to store the parsed values + static void parseValue(std::stringstream &LineStream, std::vector &Data, size_t Column = 1, char Delimeter = ',') { + std::string Cell; + size_t currentColumn = 1; + Data.clear(); + while (std::getline(LineStream, Cell, Delimeter)) { + if(currentColumn == Column){ + Data.push_back(static_cast(std::stoll(Cell))); + break; + } + currentColumn = currentColumn + 1; + } + // This checks for a trailing comma with no data after it. + if (!LineStream && Cell.empty()) { + // If there was a trailing comma then add an empty element. + Data.push_back(0); + } + } }; /// Parses and stores entries from a row of CSV data. /// /// \tparam T type of values to parse and store, i.e. entries in the row /// /// \note The implementation relies on \c rosa::csv::CSVRowParser, which is /// implemented only for `arithmetic` types -- signed and unsigned integral and /// floating-point types -- and for \c std::string. Those are the valid values /// for \p T. template class CSVRow { public: /// Gives a constant reference for an entry at a given position of the row. /// /// \note No bounds checking is performed. /// /// \param Index the position of the entry /// /// \return constant reference for the stored entry at position \p Index const T &operator[](const size_t Index) const noexcept { return Data[Index]; } /// Tells the number of entries stored in the row. /// /// \return number of stored entries. size_t size(void) const noexcept { return Data.size(); } /// Parses and stores one row of CSV data. /// /// The function reads one line from \p Str and parses it into /// \c rosa::csv::CSVRow::Data using \c rosa::csv::CSVRowParser. /// /// \param [in,out] Str input stream of a CSV file void readNextRow(std::istream &Str) { std::string Line; std::getline(Str, Line); std::stringstream LineStream(Line); CSVRowParser::parse(LineStream, Data, Delimeter); } bool isNumeric(const std::string& input){ return std::all_of(input.begin(), input.end(), ::isdigit); } void checkIfHeader(std::istream &Str){ std::string Line; std::getline(Str, Line); std::vector FirstRowValues; std::stringstream LineStream(Line); std::string Value; bool HasHeaderLocal = true; CSVRowParser::parse(LineStream, FirstRowValues, Delimeter); for(std::vector::iterator it = FirstRowValues.begin(); it != FirstRowValues.end(); ++it){ Value = *it; if(isNumeric(Value)){ HasHeaderLocal = false; } } if(HasHeaderLocal){ Header.swap(FirstRowValues); /* // only for debugging purpose. for(std::vector::iterator it = Header.begin(); it != Header.end(); ++it){ std::cout << *it << ", "; } std::cout << std::endl; */ }else { std::stringstream LineStream2(Line); CSVRowParser::parse(LineStream2, Data, Delimeter); } HasHeader = HasHeaderLocal; isFirstRow = false; } bool isHavingHeader(){ return HasHeader; } inline void setDelimeter(char Delimeter){ this->Delimeter = Delimeter; } inline char getDelimeter(){ return this->Delimeter; } inline void setEndOfLine(char EndOfLine){ this->EndOfLine = EndOfLine; } inline char getEndOfLine(){ return this->EndOfLine; } inline bool isThisFirstRow(){ return this->isFirstRow; } + inline void setColumn(const size_t & Column){ + this->Column = Column; + } + +private: + std::vector Data; ///< Stores parsed entries + uint64_t RowNumber = 0; ///< Current row number + bool isFirstRow = true; ///< Is this the first row + bool isFirstRowRead = false; ///< Is the first row read already + bool HasHeader = false; ///< Has the current csv file a header + std::vector Header; ///< Stores the header entries if available + char Delimeter = ','; ///< Stores the delimeter between data entries + char EndOfLine = '\n'; ///< Stores the end of line character + size_t Column = 1; ///< Stores the column to get the data out of the row +}; + +/// Parses and stores entries from a row of CSV data. +/// It parses an entire row and takes only the value of the corresponding column. +/// +/// \tparam T type of values to parse and store, i.e. entries in the row +/// +/// \note The implementation relies on \c rosa::csv::CSVRowParser, which is +/// implemented only for `arithmetic` types -- signed and unsigned integral and +/// floating-point types -- and for \c std::string. Those are the valid values +/// for \p T. +template +class CSVValue { +public: + /// Gives a constant reference for an entry at a given position of the row. + /// + /// \note No bounds checking is performed. + /// + /// \param Index the position of the entry + /// + /// \return constant reference for the stored entry at position \p Index + const T &operator[](const size_t Index) const noexcept { return Data[Index]; } + + /// Tells the number of entries stored in the row. + /// + /// \return number of stored entries. + size_t size(void) const noexcept { return Data.size(); } + + /// Parses and stores one row of CSV data. + /// + /// The function reads one line from \p Str and parses it into + /// \c rosa::csv::CSVRow::Data using \c rosa::csv::CSVRowParser. + /// + /// \param [in,out] Str input stream of a CSV file + void readNextValue(std::istream &Str) { + std::string Line; + std::getline(Str, Line); + std::stringstream LineStream(Line); + + CSVRowParser::parseValue(LineStream, Data, Column, Delimeter); + } + + bool isNumeric(const std::string& input){ + return std::all_of(input.begin(), input.end(), ::isdigit); + } + + void checkIfHeader(std::istream &Str){ + std::string Line; + std::getline(Str, Line); + std::vector FirstRowValues; + std::stringstream LineStream(Line); + std::string Value; + bool HasHeaderLocal = true; + + CSVRowParser::parse(LineStream, FirstRowValues, Delimeter); + for(std::vector::iterator it = FirstRowValues.begin(); + it != FirstRowValues.end(); ++it){ + Value = *it; + if(isNumeric(Value)){ + HasHeaderLocal = false; + } + } + if(HasHeaderLocal){ + Header.swap(FirstRowValues); + /* // only for debugging purpose. + for(std::vector::iterator it = Header.begin(); + it != Header.end(); ++it){ + std::cout << *it << ", "; + } + std::cout << std::endl; */ + }else { + std::stringstream LineStream2(Line); + CSVRowParser::parseValue(LineStream2, Data, Delimeter, Column); + } + HasHeader = HasHeaderLocal; + isFirstRow = false; + } + + bool isHavingHeader(){ + return HasHeader; + } + + inline void setDelimeter(char Delimeter){ + this->Delimeter = Delimeter; + } + + inline char getDelimeter(){ + return this->Delimeter; + } + + inline void setEndOfLine(char EndOfLine){ + this->EndOfLine = EndOfLine; + } + + inline char getEndOfLine(){ + return this->EndOfLine; + } + + inline bool isThisFirstRow(){ + return this->isFirstRow; + } + + inline void setColumn(const size_t & Column){ + this->Column = Column; + } + private: std::vector Data; ///< Stores parsed entries uint64_t RowNumber = 0; ///< Current row number - bool isFirstRow = true; ///< is this the first row - bool isFirstRowRead = false; ///< is the first row read already + bool isFirstRow = true; ///< Is this the first row + bool isFirstRowRead = false; ///< Is the first row read already bool HasHeader = false; ///< Has the current csv file a header - std::vector Header; /// < stores the header entries if available - char Delimeter = ','; /// < stores the delimeter between data entries - char EndOfLine = '\n'; ///< stores the end of line character + std::vector Header; ///< Stores the header entries if available + char Delimeter = ','; ///< Stores the delimeter between data entries + char EndOfLine = '\n'; ///< Stores the end of line character + size_t Column = 1; ///< Stores the column to get the data out of the row }; + /// Reads a row of CSV data into \c rosa::csv::CSVRow. /// /// The next line is read from \p Str by calling /// \c rosa::csv::CSVRow::readNextRow on \p Data. /// /// \note A CSV file should contain no empty lines. /// /// \param [in,out] Str input stream of a CSV file /// \param [in,out] Data object to read the next line into /// /// \return \p Str after reading one line from it template std::istream &operator>>(std::istream &Str, CSVRow &Data) { if (Data.isThisFirstRow()){ Data.checkIfHeader(Str); if(Data.isHavingHeader()){ Data.readNextRow(Str); } }else { Data.readNextRow(Str); } /* // just for debugging purpose char c; while(Str.get(c)){ std::cout << c; } std::cout << std::endl; */ return Str; } +/// Reads a value of CSV data into \c rosa::csv::CSVValue. +/// +/// The next line is read from \p Str by calling +/// \c rosa::csv::CSVValue::readNextValue on \p Data. +/// +/// \note A CSV file should contain no empty lines. +/// +/// \param [in,out] Str input stream of a CSV file +/// \param [in,out] Data object to read the next line into +/// +/// \return \p Str after reading one line from it +template +std::istream &operator>>(std::istream &Str, CSVValue &Data) { + if (Data.isThisFirstRow()){ + Data.checkIfHeader(Str); + if(Data.isHavingHeader()){ + Data.readNextValue(Str); + } + }else { + Data.readNextValue(Str); + } + /* // just for debugging purpose + char c; + while(Str.get(c)){ + std::cout << c; + } + std::cout << std::endl; + */ + return Str; +} + } // End namespace /// Provides `InputIterator` features for iterating over a CSV file in a /// flat way. /// /// The iterator hides rows of the CSV file, and iterates over the entries /// row-by-row. /// /// \note A CSV file should contain no empty lines. /// /// \tparam T type of values to iterate over, i.e. entries in the CSV file. /// /// \note The implementation relies on \c rosa::csv::CSVRow, which in turn /// relies on \c rosa::csv::CSVRowParser, which is implemented only for /// `arithmetic` types -- signed and unsigned integral types and floating-point /// types -- and for \c std::string. Those are the valid values for \p T. template class CSVFlatIterator { public: /// \defgroup CSVFlatIteratorTypedefs Typedefs of rosa::csv::CSVFlatIterator /// /// Standard `typedef`s for iterators. /// ///@{ typedef std::input_iterator_tag iterator_category; ///< Category of the iterator. typedef T value_type; ///< Type of values iterated over. typedef std::size_t difference_type; ///< Type to identify distance. typedef T *pointer; ///< Pointer to the type iterated over. typedef T &reference; ///< Reference to the type iterated over. ///@} /// Creates a new instance. /// /// \param [in,out] S input stream to iterate over - CSVFlatIterator(std::istream &S) + CSVFlatIterator(std::istream &S, size_t Column = 1, bool DataRow = true, + const char Delimeter = ',', const char EndOfLine = '\n') + : Str(S.good() ? &S : nullptr), + Pos((size_t)(-1)), + Delimeter(Delimeter), EndOfLine(EndOfLine), + Column(Column), DataRow(DataRow){ + Row.setDelimeter(Delimeter); + Row.setEndOfLine(EndOfLine); + Row.setColumn(Column); + + Value.setDelimeter(Delimeter); + Value.setEndOfLine(EndOfLine); + Value.setColumn(Column); + // \c rosa::csv::CSVFlatIterator::Pos is initialized to `-1` so the first + // incrementation here will set it properly. + ++(*this); + } + + /// Creates a new instance. + /// + /// \param [in,out] S input stream to iterate over + CSVFlatIterator(std::istream &S, size_t Column = 1) : Str(S.good() ? &S : nullptr), Pos((size_t)(-1)), - Delimeter(','), EndOfLine('\n') { + Delimeter(','), EndOfLine('\n'), + Column(Column), DataRow(true){ Row.setDelimeter(Delimeter); Row.setEndOfLine(EndOfLine); + Row.setColumn(Column); + + Value.setDelimeter(Delimeter); + Value.setEndOfLine(EndOfLine); + Value.setColumn(Column); // \c rosa::csv::CSVFlatIterator::Pos is initialized to `-1` so the first // incrementation here will set it properly. ++(*this); } /// Creates an empty new instance. - CSVFlatIterator(void) noexcept : Str(nullptr) {} + CSVFlatIterator(void) noexcept : Str(nullptr), DataRow(true) {} /// Pre-increment operator. /// /// The implementation moves over the entries in the current row and advances /// to the next row when the end of the current row is reached. If the end of /// the input stream is reached, the operator becomes empty and has no /// further effect. /// /// \return \p this object after incrementing it. CSVFlatIterator &operator++() { if (Str) { ++Pos; - if (Pos == Row.size()) { - if (!((*Str) >> Row)) { - Str = nullptr; - --Pos; // Stay on the last entry forever. - } else { - Pos = 0; + if(DataRow){ + if (Pos == Row.size()) { + if (!((*Str) >> Row)) { + Str = nullptr; + --Pos; // Stay on the last entry forever. + } else { + Pos = 0; + } + } + }else{ + if (Pos == Value.size()) { + if (!((*Str) >> Value)) { + Str = nullptr; + --Pos; // Stay on the last entry forever. + } else { + Pos = 0; + } + } } - } + } return *this; } /// Post-increment operator. /// /// The implementation uses the pre-increment operator and returns a copy of /// the original state of \p this object. /// /// \return \p this object before incrementing it. CSVFlatIterator operator++(int) { CSVFlatIterator Tmp(*this); ++(*this); return Tmp; } /// Returns a constant reference to the current entry. /// /// \note Should not dereference the iterator when it is empty. /// - /// \return constant reference to the current entry. - const T &operator*(void)const noexcept { return Row[Pos]; } + /// \return constanStartt reference to the current entry. + const T &operator*(void)const noexcept { + if(DataRow){ + return Row[Pos]; + }else { + return Value[Pos]; + } + } /// Returns a constant pointer to the current entry. /// /// \note Should not dereference the iterator when it is empty. /// /// \return constant pointer to the current entry. - const T *operator->(void)const noexcept { return &Row[Pos]; } + const T *operator->(void)const noexcept { + if(DataRow){ + return &Row[Pos]; + }else { + return &Value[Pos]; + } + } /// Tells if \p this object is equal to another one. /// /// Two \c rosa::csv::CSVReader instances are equal if and only if they are /// the same or both are empty. /// /// \param RHS other object to compare to /// /// \return whether \p this object is equal with \p RHS bool operator==(const CSVFlatIterator &RHS) const noexcept { return ((this == &RHS) || ((this->Str == nullptr) && (RHS.Str == nullptr))); } /// Tells if \p this object is not equal to another one. /// /// \see rosa::csv::CSVReader::operator== /// /// \param RHS other object to compare to /// /// \return whether \p this object is not equal with \p RHS. bool operator!=(const CSVFlatIterator &RHS) const noexcept { return !((*this) == RHS); } inline void setDelimeter(char Delimter){ this->Delimeter = Delimter; } inline char getDelimeter(){ return this->Delimeter; } private: std::istream *Str; ///< Input stream of a CSV file to iterate over. CSVRow Row; ///< Content of the current row iterating over. + CSVValue Value; ///< Content of the specified column in the current row. size_t Pos; ///< Current position within the current row. char Delimeter; ///< Delimeter between the entries char EndOfLine; ///< stores the end of line character - + size_t Column; ///< Index of the column to get data out of it. + bool DataRow; ///< Indicates if you want to read a row or only one column out of a row. true = use CSVRow }; } // End namespace csv } // End namespace rosa #endif // ROSA_SUPPORT_CSV_CSVREADER_HPP