diff --git a/include/rosa/support/csv/CSVReader.hpp b/include/rosa/support/csv/CSVReader.hpp index 77419b7..38826be 100755 --- a/include/rosa/support/csv/CSVReader.hpp +++ b/include/rosa/support/csv/CSVReader.hpp @@ -1,375 +1,375 @@ //===-- rosa/support/csv/CSVReader.hpp --------------------------*- C++ -*-===// // // The RoSA Framework // //===----------------------------------------------------------------------===// /// /// \file rosa/support/csv/CSVReader.hpp /// /// \author David Juhasz (david.juhasz@tuwien.ac.at) /// /// \date 2017 /// /// \brief Facitilities to read CSV files. /// /// \note The implementation is based on the solution at /// https://stackoverflow.com/a/1120224 /// //===----------------------------------------------------------------------===// #ifndef ROSA_SUPPORT_CSV_CSVREADER_HPP #define ROSA_SUPPORT_CSV_CSVREADER_HPP #include "rosa/support/debug.hpp" #include #include #include namespace rosa { namespace csv { /// Anonymous namespace providing implementation details for /// \c rosa::csv::CSVIterator, consider it private. namespace { /// Provides facility for parsing values from one row CSV data. /// /// \tparam T type of values to parse from the line /// \tparam IsSignedInt if \p T is a signed integral type, always use default /// \tparam IsUnsignedInt if \p T is an unsigned integral type, always use /// default /// \tparam IsFloat if \p T is a floating-point type, always use default /// \tparam IsString if \p T is \c std::string, always use default /// /// \note Specializations of this `struct` are provided for arithmentic types /// and \c std::string. template ::value && std::is_signed::value), bool IsUnsignedInt = (std::is_integral::value && std::is_unsigned::value), bool IsFloat = std::is_floating_point::value, bool IsString = std::is_same::value> struct CSVRowParser; /// Specialization for signed integral types. /// /// \tparam T type of values to parse from the line /// /// \pre \p T is a signed integral type:\code /// std::is_integral::value && std::is_signed::value /// \code template struct CSVRowParser { STATIC_ASSERT((std::is_integral::value && std::is_signed::value), "wrong type"); // Sanity check. /// Parses a given row of CSV data into a given container. /// /// \p Data is cleared and then filled with values parsed from \p LineStream. /// Entries in the line are to be separated by commas, the character `,`. A /// trailing comma results in an empty entry at the end of the line. No empty /// entry should be present otherwise. /// /// \param [in,out] LineStream the line to parse /// \param [in,out] Data the container to store the parsed values static void parse(std::stringstream &LineStream, std::vector &Data) { std::string Cell; Data.clear(); while (std::getline(LineStream, Cell, ',')) { Data.push_back(std::stoll(Cell)); } // This checks for a trailing comma with no data after it. if (!LineStream && Cell.empty()) { // If there was a trailing comma then add an empty element. Data.push_back(0); } } }; /// Specialization for unsigned integral types. /// /// \tparam T type of values to parse from the line /// /// \pre \p T is an unsigned integral type:\code /// std::is_integral::value && std::is_unsigned::value /// \code template struct CSVRowParser { STATIC_ASSERT((std::is_integral::value && std::is_unsigned::value), "wrong type"); // Sanity check. /// Parses a given row of CSV data into a given container. /// /// \p Data is cleared and then filled with values parsed from \p LineStream. /// Entries in the line are to be separated by commas, the character `,`. A /// trailing comma results in an empty entry at the end of the line. No empty /// entry should be present otherwise. /// /// \param [in,out] LineStream the line to parse /// \param [in,out] Data the container to store the parsed values static void parse(std::stringstream &LineStream, std::vector &Data) { std::string Cell; Data.clear(); while (std::getline(LineStream, Cell, ',')) { Data.push_back(std::stoull(Cell)); } // This checks for a trailing comma with no data after it. if (!LineStream && Cell.empty()) { // If there was a trailing comma then add an empty element. Data.push_back(0); } } }; /// Specialization for floating-point types. /// /// \tparam T type of values to parse from the line /// /// \pre \p T is a floating-point type:\code /// std::is_floating_point::value /// \code template struct CSVRowParser { STATIC_ASSERT((std::is_floating_point::value), "wrong type"); // Sanity check. /// Parses a given row of CSV data into a given container. /// /// \p Data is cleared and then filled with values parsed from \p LineStream. /// Entries in the line are to be separated by commas, the character `,`. A /// trailing comma results in an empty entry at the end of the line. No empty /// entry should be present otherwise. /// /// \param [in,out] LineStream the line to parse /// \param [in,out] Data the container to store the parsed values static void parse(std::stringstream &LineStream, std::vector &Data) { std::string Cell; Data.clear(); while (std::getline(LineStream, Cell, ',')) { Data.push_back(std::stold(Cell)); } // This checks for a trailing comma with no data after it. if (!LineStream && Cell.empty()) { // If there was a trailing comma then add an empty element. Data.push_back(0); } } }; /// Specialization for \c std::string. /// /// \tparam T type of values to parse from the line /// /// \pre \p T is \c std::string:\code /// std::is_same::value /// \code template struct CSVRowParser { STATIC_ASSERT((std::is_same::value), "wrong type"); // Sanity check. /// Parses a given row of CSV data into a given container. /// /// \p Data is cleared and then filled with values parsed from \p LineStream. /// Entries in the line are to be separated by commas, the character `,`. A /// trailing comma results in an empty entry at the end of the line. No empty /// entry should be present otherwise. /// /// \param [in,out] LineStream the line to parse /// \param [in,out] Data the container to store the parsed values static void parse(std::stringstream &LineStream, std::vector &Data) { std::string Cell; Data.clear(); while (std::getline(LineStream, Cell, ',')) { Data.push_back(Cell); } // This checks for a trailing comma with no data after it. if (!LineStream && Cell.empty()) { // If there was a trailing comma then add an empty element. Data.push_back(""); } } }; /// Parses and stores entries from a row of CSV data. /// /// \tparam T type of values to parse and store, i.e. entries in the row /// /// \note The implementation relies on \c rosa::csv::CSVRowParser, which is /// implemented only for `arithmetic` types -- signed and unsigned integral and /// floating-point types -- and for \c std::string. Those are the valid values /// for \p T. template class CSVRow { public: /// Gives a constant reference for an entry at a given position of the row. /// /// \note No bounds checking is performed. /// /// \param Index the position of the entry /// /// \return constant reference for the stored entry at position \p Index const T &operator[](const size_t Index) const noexcept { return Data[Index]; } /// Tells the number of entries stored in the row. /// /// \return number of stored entries. size_t size(void) const noexcept { return Data.size(); } /// Parses and stores one row of CSV data. /// /// The function reads one line from \p Str and parses it into /// \c rosa::csv::CSVRow::Data using \c rosa::csv::CSVRowParser. /// /// \param [in,out] Str input stream of a CSV file void readNextRow(std::istream &Str) { std::string Line; std::getline(Str, Line); std::stringstream LineStream(Line); CSVRowParser::parse(LineStream, Data); } private: std::vector Data; ///< Stores parsed entries }; /// Reads a row of CSV data into \c rosa::csv::CSVRow. /// /// The next line is read from \p Str by calling /// \c rosa::csv::CSVRow::readNextRow on \p Data. /// /// \note A CSV file should contain no empty lines. /// /// \param [in,out] Str input stream of a CSV file /// \param [in,out] Data object to read the next line into /// /// \return \p Str after reading one line from it template std::istream &operator>>(std::istream &Str, CSVRow &Data) { Data.readNextRow(Str); return Str; } } // End namespace /// Provides `InputIterator` features for iterating over a CSV file in a /// flat way. /// /// The iterator hides rows of the CSV file, and iterates over the entries /// row-by-row. /// /// \note A CSV file should contain no empty lines. /// /// \tparam T type of values to iterate over, i.e. entries in the CSV file. /// /// \note The implementation relies on \c rosa::csv::CSVRow, which in turn /// relies on \c rosa::csv::CSVRowParser, which is implemented only for /// `arithmetic` types -- signed and unsigned integral types and floating-point /// types -- and for \c std::string. Those are the valid values for \p T. template class CSVFlatIterator { public: /// \defgroup CSVIteratorTypedefs /// /// Standard `typedef`s for iterators. /// ///@{ typedef std::input_iterator_tag iterator_category; ///< Category of the iterator. typedef T value_type; ///< Type of values iterated over. typedef std::size_t difference_type; ///< Type to identify distance. typedef T *pointer; ///< Pointer to the type iterated over. typedef T &reference; ///< Reference to the type iterated over. ///@} /// Creates a new instance. /// /// \param [in,out] S input stream to iterate over CSVFlatIterator(std::istream &S) : Str(S.good() ? &S : nullptr), Pos(-1) { // \c rosa::csv::CSVFlatIterator::Pos is initialized to `-1` so the first // incrementation here will set it properly. ++(*this); } /// Creates an empty new instance. CSVFlatIterator(void) noexcept : Str(nullptr) {} /// Pre-increment operator. /// /// The implementation moves over the entries in the current row and advances /// to the next row when the end of the current row is reached. If the end of /// the input stream is reached, the operator becomes empty and has no /// further effect. /// /// \return \p this object after incrementing it. CSVFlatIterator &operator++() { if (Str) { ++Pos; if (Pos == Row.size()) { if (!((*Str) >> Row)) { Str = nullptr; --Pos; // Stay on the last entry forever. } else { Pos = 0; } } } return *this; } /// Post-increment operator. /// /// The implementation uses the pre-increment operator and returns a copy of /// the original state of \p this object. /// /// \return \p this object before incrementing it. CSVFlatIterator operator++(int) { CSVFlatIterator Tmp(*this); ++(*this); return Tmp; } /// Returns a constant reference to the current entry. /// /// \note Should not dereference the iterator when it is empty. /// /// \return constant reference to the current entry. const T &operator*(void)const noexcept { return Row[Pos]; } /// Returns a constant pointer to the current entry. /// /// \note Should not dereference the iterator when it is empty. /// /// \return constant pointer to the current entry. const T *operator->(void)const noexcept { return &Row[Pos]; } /// Tells if \p this object is equal to another one. /// /// Two \c rosa::csv::CSVReader instances are equal if and only if they are /// the same or both are empty. /// /// \param RHS other object to compare to /// /// \return whether \p this object is equal with \p RHS bool operator==(const CSVFlatIterator &RHS) const noexcept { return ((this == &RHS) || ((this->Str == nullptr) && (RHS.Str == nullptr))); } /// Tells if \p this object is not equal to another one. /// /// \see rosa::csv::CSVReader::operator== /// /// \param RHS other object to compare to /// /// \return whether \p this object is not equal with \p RHS. bool operator!=(const CSVFlatIterator &RHS) const noexcept { return !((*this) == RHS); } private: std::istream *Str; ///< Input stream of a CSV file to iterate over. CSVRow Row; ///< Content of the current row iterating over. size_t Pos; ///< Current position within the current row. }; } // End namespace csv } // End namespace rosa -#endif // ROSA_SUPPOR_CSV_CSVREADER_HPP +#endif // ROSA_SUPPORT_CSV_CSVREADER_HPP