diff --git a/include/rosa/support/csv/CSVReader.hpp b/include/rosa/support/csv/CSVReader.hpp old mode 100755 new mode 100644 index 2c33357..0094137 --- a/include/rosa/support/csv/CSVReader.hpp +++ b/include/rosa/support/csv/CSVReader.hpp @@ -1,488 +1,488 @@ //===-- rosa/support/csv/CSVReader.hpp --------------------------*- C++ -*-===// // // The RoSA Framework // // Distributed under the terms and conditions of the Boost Software License 1.0. // See accompanying file LICENSE. // // If you did not receive a copy of the license file, see // http://www.boost.org/LICENSE_1_0.txt. // //===----------------------------------------------------------------------===// /// /// \file rosa/support/csv/CSVReader.hpp /// /// \authors David Juhasz (david.juhasz@tuwien.ac.at), Edwin Willegger /// (edwin.willegger@tuwien.ac.at) /// /// \date 2017-2019 /// /// \brief Facitilities to read CSV files. /// /// \note The implementation is based on the solution at /// https://stackoverflow.com/a/1120224 /// //===----------------------------------------------------------------------===// #ifndef ROSA_SUPPORT_CSV_CSVREADER_HPP #define ROSA_SUPPORT_CSV_CSVREADER_HPP #include "rosa/support/debug.hpp" #include "rosa/support/sequence.hpp" #include #include #include #include #include #include namespace rosa { namespace csv { /// Indicating it the CSV file contains any header or not enum class HeaderInformation { HasHeader, HasNoHeader }; /// Anonymous namespace providing implementation details for /// \c rosa::csv::CSVIterator, consider it private. namespace { /// Provides facility for parsing one value from a string. /// /// \tparam T type of value to parse /// \tparam IsSignedInt if \p T is a signed integral type, always use default /// \tparam IsUnsignedInt if \p T is an unsigned integral type, always use /// default /// \tparam IsFloat if \p T is a floating-point type, always use default /// \tparam IsString if \p T is \c std::string, always use default /// /// \note Specializations of this struct are provided for arithmentic types /// and \c std::string. template ::value && std::is_signed::value), bool IsUnsignedInt = (std::is_integral::value && std::is_unsigned::value), bool IsFloat = std::is_floating_point::value, bool IsString = std::is_same::value> struct ValueParser { /// /// /// \param Cell the \c std::string to parse /// /// \return the parsed value /// /// \note The function silently fails if cannot parse \p Cell for type \p T. static T parse(const std::string &Cell) noexcept; }; template struct ValueParser { STATIC_ASSERT((std::is_integral::value && std::is_signed::value), "wrong type"); // Sanity check. static T parse(const std::string &Cell) noexcept { return static_cast(std::stoll(Cell)); } }; template struct ValueParser { STATIC_ASSERT((std::is_integral::value && std::is_unsigned::value), "wrong type"); // Sanity check. static T parse(const std::string &Cell) noexcept { return static_cast(std::stoull(Cell)); } }; template struct ValueParser { STATIC_ASSERT((std::is_floating_point::value), "wrong type"); // Sanity check. static T parse(const std::string &Cell) noexcept { return static_cast(std::stold(Cell)); } }; template struct ValueParser { STATIC_ASSERT((std::is_same::value), "wrong type"); // Sanity check. static T parse(const std::string &Cell) noexcept { return Cell; } }; /// Parses and stores entries from a row of CSV data. /// /// \tparam Ts types of values to parse and store, i.e. entries in the row /// /// \note The implementation relies on \c rosa::csv::CSVRowParser, which is /// implemented only for `arithmetic` types -- signed and unsigned integral /// and floating-point types -- and for \c std::string. Those are the valid /// values for \p Ts. template class CSVRow { private: /// Parses a given row of CSV data into \c CSVRow::Data. /// /// \ CSVRow::Data is filled with values parsed from \p LineStream. Entries /// in the line are to be separated by commas, the character `,`. /// /// \note Parsed values are silently converted to types \p Ts. /// /// \note Parsing silently fails if values do not match \p Ts. /// /// \tparam S0 indices to access tuple elements. /// /// \param [in,out] LineStream the line to parse /// /// \note The last argument is used only to get \p S0, the actual value of /// the parameter is ignored. template void parseRow(std::stringstream &LineStream, char Delimiter, Seq) { STATIC_ASSERT(sizeof...(Ts) == sizeof...(S0), "Not matching template arguments."); std::string Cell; // Get fields and parse the values into the proper element of the tuple // one by one in a fold expression. ((std::getline(LineStream, Cell, Delimiter), std::get(Data) = ValueParser::parse(Cell)), ...); } public: /// Constructor with all possible parameters /// /// The function creates an instance of an CSVRow object and sets the /// attributes of the /// object to the values of the parameters. /// /// \param SkipRows the number of data rows to skip, not taking header into /// account. /// \param HeaderInfo is the first line of the file a header row or not. /// \param Delimiter to seperate between the data entries within one row. CSVRow(const size_t SkipRows = 0, const HeaderInformation HeaderInfo = HeaderInformation::HasHeader, const char Delimiter = ',') : SkipRows(SkipRows), HeaderInfo(HeaderInfo), Delimiter(Delimiter), RowNumber(0), IsHeaderRead(false) {} /// Parses and stores one row of CSV data. /// /// The function reads one line from \p Str and parses it into /// \c rosa::csv::CSVRow::Data using \c rosa::csv::CSVRowParser. /// /// \param [in,out] Str input stream of a CSV file void readNextRow(std::istream &Str) noexcept { std::string Line; std::getline(Str, Line); if (Line.size() > 0) { std::stringstream LineStream(Line); parseRow(LineStream, Delimiter, seq_t()); RowNumber = RowNumber + 1; } } /// Read header row and stores it as \p std::string. /// /// The function reads the first line of the csv file and stores the entries /// in a vector. /// /// \param [in,out] Str input stream of a CSV file void readHeader(std::istream &Str) noexcept { std::string Line; std::getline(Str, Line); std::stringstream LineStream(Line); std::string Value; while (getline(LineStream, Value, Delimiter)) { Header.push_back(Value); } IsHeaderRead = true; } /// The number of rows to skip once. /// /// This function returns the number of data rows to skip /// at the beginning of the file. /// /// \return The number of rows to skip at the beginning of a csv file. inline size_t SkipNumRows() const noexcept { return this->SkipRows; } /// The current row number within the csv file. /// /// This function returns the current row number. The header /// row is not counted as a row. /// /// \returns the current row number within the csv file. inline size_t CurRow() const noexcept { return this->RowNumber; } /// Indiciates if the header was already read. /// /// This function returns true, if the header of a csv file which contains /// a header file is already read. /// The user has to pass in the attribute HeaderInfo the information if the /// file has in the first row the header row or not. /// /// \return if the header of a file is already read. inline bool IsHeaderReadDone() const noexcept { return this->IsHeaderRead; } /// Indicates if the file contains a header row in the first row. /// /// This function returns if the file contains a header row. /// The information if the file contains a header row or not, has to be passed /// by the user. /// The standard value is HeaderInformation::HasHeader /// /// \return if the csv file contains a header row in the first line of the /// file. inline HeaderInformation HasFileHeader() const noexcept { return this->HeaderInfo; } /// Set the number of rows to skip. /// /// This function sets the number of rows to skip at the beginning of /// the reading of the file. /// /// \param SkipRowsBeginning the number of rows you want to skip at the beginning of the file. inline void SetSkipRows(const size_t SkipRowsBeginning) noexcept { this->SkipRows = SkipRowsBeginning; } /// Is the first row a header row or not. /// /// This function sets the information, if the first row of the csv file /// is a header line or not. /// /// \param HeaderInf if the first row is a header row or not. inline void SetHeaderInfo(const HeaderInformation HeaderInf) noexcept { this->HeaderInfo = HeaderInf; } /// Set the seperator between data entries. /// /// This funcction sets the separator between the data entries of the csv /// file. /// /// \param separator the character that separates the data values. - inline void SetDelimiter(char separator) { - this->Delimiter = separator; + inline void SetDelimeter(char separator) { + this->Delimeter = separator; } /// Gives a constant references for the \c std::tuple containing the values /// read by \p this object. /// /// \return \c CSVRow::Data const std::tuple &tuple(void) const noexcept { return Data; } private: std::tuple Data; ///< Stores parsed entries size_t SkipRows; ///< The number of rows to skip at the very beginning of the /// file. ///< This number only applies on the number of data rows. ///< If your file contains a header row and data rows, the skiping ///< of the header row is not taken into account. HeaderInformation HeaderInfo; ///< If the file contains a header row or not. char Delimiter; ///< The seperator between the data entries. size_t RowNumber; ///< Current row number, counts all row numbers including /// the header row. bool IsHeaderRead; ///< Was the header read or not. std::vector Header; ///< The content of the header row. }; /// Reads a row of CSV data into \c rosa::csv::CSVRow. /// /// The next line is read from \p Str by calling /// \c rosa::csv::CSVRow::readNextRow on \p Data until all lines are /// skipped. /// /// If the function is called for the first time and the file contains /// a header than is the header and the first data row read in after the /// number of rows that the user wants to skip. /// /// \tparam Ts type of values to read from the row /// /// \note The CSV file should contain a line with fields matching \p Ts... /// /// \param [in,out] Str input stream of a CSV file /// \param [in,out] Data object to read the next line into /// /// \return \p Str after reading one line from it template std::istream &operator>>(std::istream &Str, CSVRow &Data) { if (Data.HasFileHeader() == HeaderInformation::HasHeader && !Data.IsHeaderReadDone()) { Data.readHeader(Str); } while (Data.CurRow() < (Data.SkipNumRows())) { Data.readNextRow(Str); } // read the lines after you skipped the number of rows you want to skip Data.readNextRow(Str); return Str; } } // End namespace /// Provides `InputIterator` features for iterating over a CSV file. /// /// The iterator parses rows into `std::tuple` values and iterates over the /// file row by row. /// /// \tparam Ts types of values stored in one row of the CSV file /// /// \note The iterator expects each row to consists of fields matching \p Ts. /// /// \note The implementation relies on \c rosa::csv::CSVRow, which in turn /// relies on \c rosa::csv::CSVRowParser, which is implemented only for /// `arithmetic` types -- signed and unsigned integral types and floating-point /// types -- and for \c std::string. Those are the valid values for \p Ts template class CSVIterator { public: /// \defgroup CSVIteratorTypedefs Typedefs of rosa::csv::CSVIterator /// /// Standard `typedef`s for iterators. /// ///@{ typedef std::input_iterator_tag iterator_category; ///< Category of the iterator. typedef std::tuple value_type; ///< Type of values iterated over. typedef std::size_t difference_type; ///< Type to identify distance. typedef std::tuple *pointer; ///< Pointer to the type iterated over. typedef std::tuple &reference; ///< Reference to the type iterated over. ///@} /// Creates a new instance. /// /// \param [in,out] S input stream to iterate over /// \param SkipRows the number of rows you want to skip only once at /// the beginning of the file. /// If you have an header in the file, it is supposed to be /// the first row, and it will be always read out. /// But after this header the next number of Rows will be /// skipped. /// \param HeaderInfo is used to know wheter the file contains an /// header row or not. /// The header has to be in the first row. /// \param Delimiter is the separator between the differnt values of /// the csv file. CSVIterator(std::istream &S, const size_t SkipRows = 0, const HeaderInformation HeaderInfo = HeaderInformation::HasHeader, const char Delimiter = ',') : Str(S.good() ? &S : nullptr), SkipRows(SkipRows), HeaderInfo(HeaderInfo), Delimiter(Delimiter), Row() { Row.SetSkipRows(SkipRows); Row.SetHeaderInfo(HeaderInfo); Row.SetDelimiter(Delimiter); // \c rosa::csv::CSVIterator::Row is initialized empty so the first // incrementation here will read the first row. ++(*this); } /// Creates an empty new instance. CSVIterator(void) noexcept : Str(nullptr), SkipRows(0), HeaderInfo(HeaderInformation::HasHeader), - Delimiter(','), Row() {} + Delimeter(','), Row() {} /// Pre-increment operator. /// /// The implementation reads the next row. If the end of the input stream is /// reached, the operator becomes empty and has no further effect. /// /// \return \p this object after incrementing it. CSVIterator &operator++() { if (Str) { if (!((*Str) >> Row)) { Str = nullptr; } } return *this; } /// Post-increment operator. /// /// The implementation uses the pre-increment operator and returns a copy of /// the original state of \p this object. /// /// \return \p this object before incrementing it. CSVIterator operator++(int) { CSVIterator Tmp(*this); ++(*this); return Tmp; } /// Returns a constant reference to the current entry. /// /// \note Should not dereference the iterator when it is empty. /// /// \return constant reference to the current entry. const std::tuple &operator*(void)const noexcept { return Row.tuple(); } /// Returns a constant pointer to the current entry. /// /// \note Should not dereference the iterator when it is empty. /// /// \return constant pointer to the current entry. const std::tuple *operator->(void)const noexcept { return &Row.tuple(); } /// Tells if \p this object is equal to another one. /// /// Two \c rosa::csv::CSVReader instances are equal if and only if they are /// the same or both are empty. /// /// \param RHS other object to compare to /// /// \return whether \p this object is equal with \p RHS bool operator==(const CSVIterator &RHS) const noexcept { return ((this == &RHS) || ((this->Str == nullptr) && (RHS.Str == nullptr))); } /// Tells if \p this object is not equal to another one. /// /// \see rosa::csv::CSVReader::operator== /// /// \param RHS other object to compare to /// /// \return whether \p this object is not equal with \p RHS. bool operator!=(const CSVIterator &RHS) const noexcept { return !((*this) == RHS); } - /// Set the delimiter used in the csv file. + /// Set the delimeter used in the csv file. /// \param Separator the character which separates the values in the csv file. - inline void setDelimiter(char Separator) noexcept { - this->Delimiter = Separator; + inline void setDelimeter(char Separator) noexcept { + this->Delimeter = Separator; } /// get the delimiter currently set to separate the values in the csv file. /// \return the current character, which is used to separte teh values in the /// csv file. inline char getDelimiter() const noexcept { return this->Delimiter; } private: std::istream *Str; ///< Input stream of a CSV file to iterate over. size_t SkipRows; ///< Number of Rows to skip only once at the beginning of the /// file. HeaderInformation HeaderInfo; ///< does the csv file contain a header or not, /// if this information is ///< not given correclty, the reading of the header would result in ///< in an error. char Delimiter; ///< Delimiter between the entries in the csv file. CSVRow Row; ///< Content of the current row }; } // End namespace csv } // End namespace rosa #endif // ROSA_SUPPORT_CSV_CSVREADER_HPP