diff --git a/include/rosa/support/csv/CSVReader.hpp b/include/rosa/support/csv/CSVReader.hpp index 45c6e20..f4788f3 100755 --- a/include/rosa/support/csv/CSVReader.hpp +++ b/include/rosa/support/csv/CSVReader.hpp @@ -1,307 +1,485 @@ //===-- rosa/support/csv/CSVReader.hpp --------------------------*- C++ -*-===// // // The RoSA Framework // //===----------------------------------------------------------------------===// /// /// \file rosa/support/csv/CSVReader.hpp /// /// \authors David Juhasz (david.juhasz@tuwien.ac.at), Edwin Willegger (edwin.willegger@tuwien.ac.at) /// /// \date 2017-2019 /// /// \brief Facitilities to read CSV files. /// /// \note The implementation is based on the solution at /// https://stackoverflow.com/a/1120224 /// //===----------------------------------------------------------------------===// #ifndef ROSA_SUPPORT_CSV_CSVREADER_HPP #define ROSA_SUPPORT_CSV_CSVREADER_HPP #include "rosa/support/debug.hpp" #include "rosa/support/sequence.hpp" #include #include #include #include #include #include namespace rosa { namespace csv { /// Indicating it the CSV file contains any header or not enum class HeaderInformation { HasHeader, HasNoHeader }; /// Anonymous namespace providing implementation details for /// \c rosa::csv::CSVIterator, consider it private. namespace { /// Provides facility for parsing one value from a string. /// /// \tparam T type of value to parse /// \tparam IsSignedInt if \p T is a signed integral type, always use default /// \tparam IsUnsignedInt if \p T is an unsigned integral type, always use /// default /// \tparam IsFloat if \p T is a floating-point type, always use default /// \tparam IsString if \p T is \c std::string, always use default /// /// \note Specializations of this struct are provided for arithmentic types /// and \c std::string. template ::value && std::is_signed::value), bool IsUnsignedInt = (std::is_integral::value && std::is_unsigned::value), bool IsFloat = std::is_floating_point::value, bool IsString = std::is_same::value> struct ValueParser { /// /// /// \param Cell the \c std::string to parse /// /// \return the parsed value /// /// \note The function silently fails if cannot parse \p Cell for type \p T. static T parse(const std::string &Cell) noexcept; }; template struct ValueParser { STATIC_ASSERT((std::is_integral::value && std::is_signed::value), "wrong type"); // Sanity check. static T parse(const std::string &Cell) noexcept { return static_cast(std::stoll(Cell)); } }; template struct ValueParser { STATIC_ASSERT((std::is_integral::value && std::is_unsigned::value), "wrong type"); // Sanity check. static T parse(const std::string &Cell) noexcept { return static_cast(std::stoull(Cell)); } }; template struct ValueParser { STATIC_ASSERT((std::is_floating_point::value), "wrong type"); // Sanity check. static T parse(const std::string &Cell) noexcept { return static_cast(std::stold(Cell)); } }; template struct ValueParser { STATIC_ASSERT((std::is_same::value), "wrong type"); // Sanity check. static T parse(const std::string &Cell) noexcept { return Cell; } }; /// Parses and stores entries from a row of CSV data. /// /// \tparam Ts types of values to parse and store, i.e. entries in the row /// /// \note The implementation relies on \c rosa::csv::CSVRowParser, which is /// implemented only for `arithmetic` types -- signed and unsigned integral /// and floating-point types -- and for \c std::string. Those are the valid /// values for \p Ts. template class CSVRow { private: /// Parses a given row of CSV data into \c CSVRow::Data. /// /// \ CSVRow::Data is filled with values parsed from \p LineStream. Entries /// in the line are to be separated by commas, the character `,`. /// /// \note Parsed values are silently converted to types \p Ts. /// /// \note Parsing silently fails if values do not match \p Ts. /// /// \tparam S0 indices to access tuple elements. /// /// \param [in,out] LineStream the line to parse /// /// \note The last argument is used only to get \p S0, the actual value of /// the parameter is ignored. template void parseRow(std::stringstream &LineStream, Seq) { STATIC_ASSERT(sizeof...(Ts) == sizeof...(S0), "Not matching template arguments."); std::string Cell; // Get fields and parse the values into the proper element of the tuple // one by one in a fold expression. ((std::getline(LineStream, Cell, ','), std::get(Data) = ValueParser::parse(Cell)), ...); + } public: + + /// Constructor with all possible parameters + /// + /// The function creates an instance of an CSVRow object and sets the attributes of the + /// object to the values of the parameters. + /// + /// \param SkipRows the number of data rows to skip, not taking header into account. + /// \param HeaderInfo is the first line of the file a header row or not. + /// \param Delimeter to seperate between the data entries within one row. + CSVRow(const size_t SkipRows = 0, + const HeaderInformation HeaderInfo = HeaderInformation::HasHeader, + const char Delimeter = ',') : + SkipRows(SkipRows), HeaderInfo(HeaderInfo), Delimeter(Delimeter), + RowNumber(0), IsHeaderRead(false) { + } + + + /// Parses and stores one row of CSV data. /// /// The function reads one line from \p Str and parses it into /// \c rosa::csv::CSVRow::Data using \c rosa::csv::CSVRowParser. /// /// \param [in,out] Str input stream of a CSV file - void readNextRow(std::istream &Str) { + void readNextRow(std::istream &Str) noexcept { std::string Line; + std::getline(Str, Line); - std::stringstream LineStream(Line); - parseRow(LineStream, seq_t()); + + if(Line.size() > 0){ + std::stringstream LineStream(Line); + parseRow(LineStream, seq_t()); + + RowNumber = RowNumber + 1; + } + + } + + /// Read header row and stores it as \p std::string. + /// + /// The function reads the first line of the csv file and stores the entries + /// in a vector. + /// + /// \param [in,out] Str input stream of a CSV file + void readHeader(std::istream &Str) noexcept { + std::string Line; + std::getline(Str, Line); + std::stringstream LineStream(Line); + std::string Value; + + while( getline(LineStream, Value, Delimeter) ){ + Header.push_back(Value); + } + + IsHeaderRead = true; + } + + /// The number of rows to skip once. + /// + /// This function returns the number of data rows to skip + /// at the beginning of the file. + /// + /// \return The number of rows to skip at the beginning of a csv file. + inline size_t SkipNumRows() const noexcept { + return this->SkipRows; + } + + /// The current row number within the csv file. + /// + /// This function returns the current row number. The header + /// row is not counted as a row. + /// + /// \returns the current row number within the csv file. + inline size_t CurRow() const noexcept { + return this->RowNumber; + } + + /// Indiciates if the header was already read. + /// + /// This function returns true, if the header of a csv file which contains + /// a header file is already read. + /// The user has to pass in the attribute HeaderInfo the information if the + /// file has in the first row the header row or not. + /// + /// \return if the header of a file is already read. + inline bool IsHeaderReadDone() const noexcept{ + return this->IsHeaderRead; + } + + + /// Indicates if the file contains a header row in the first row. + /// + /// This function returns if the file contains a header row. + /// The information if the file contains a header row or not, has to be passed by the user. + /// The standard value is HeaderInformation::HasHeader + /// + /// \return if the csv file contains a header row in the first line of the file. + inline HeaderInformation HasFileHeader() const noexcept { + return this->HeaderInfo; } + /// Set the number of rows to skip. + /// + /// This function sets the number of rows to skip at the beginning of + /// the reading of the file. + /// + /// \param SkipRows the number of rows you want to skip at the beginning of the file. + inline void SetSkipRows(const size_t SkipRows) noexcept { + this->SkipRows = SkipRows; + } + + /// Is the first row a header row or not. + /// + /// This function sets the information, if the first row of the csv file + /// is a header line or not. + /// + /// \param HeaderInfo if the first row is a header row or not. + inline void SetHeaderInfo(const HeaderInformation HeaderInfo) noexcept { + this->HeaderInfo = HeaderInfo; + } + + /// Set the seperator between data entries. + /// + /// This funcction sets the separator between the data entries of the csv file. + /// + /// \param Delimeter the character that separates the data values. + inline void SetDelimeter(char Delimeter) { + this->Delimeter = Delimeter; + } + + + + /// Gives a constant references for the \c std::tuple containing the values /// read by \p this object. /// /// \return \c CSVRow::Data const std::tuple &tuple(void) const noexcept { return Data; } private: - std::tuple Data; ///< Stores parsed entries + std::tuple Data; ///< Stores parsed entries + size_t RowNumber; ///< Current row number, counts all row numbers including the header row. + HeaderInformation HeaderInfo; ///< If the file contains a header row or not. + size_t SkipRows; ///< The number of rows to skip at the very beginning of the file. + ///< This number only applies on the number of data rows. + ///< If your file contains a header row and data rows, the skiping + ///< of the header row is not taken into account. + std::vector Header; ///< The content of the header row. + bool IsHeaderRead; ///< Was the header read or not. + char Delimeter; ///< The seperator between the data entries. + }; /// Reads a row of CSV data into \c rosa::csv::CSVRow. /// /// The next line is read from \p Str by calling -/// \c rosa::csv::CSVRow::readNextRow on \p Data. +/// \c rosa::csv::CSVRow::readNextRow on \p Data until all lines are +/// skipped. +/// +/// If the function is called for the first time and the file contains +/// a header than is the header and the first data row read in after the +/// number of rows that the user wants to skip. /// /// \tparam Ts type of values to read from the row /// /// \note The CSV file should contain a line with fields matching \p Ts... /// /// \param [in,out] Str input stream of a CSV file /// \param [in,out] Data object to read the next line into /// /// \return \p Str after reading one line from it template std::istream &operator>>(std::istream &Str, CSVRow &Data) { + + if( Data.HasFileHeader() == HeaderInformation::HasHeader && !Data.IsHeaderReadDone() ) { + Data.readHeader(Str); + } + + while(Data.CurRow() < (Data.SkipNumRows())){ + Data.readNextRow(Str); + } + + //read the lines after you skipped the number of rows you want to skip Data.readNextRow(Str); + return Str; } } // End namespace /// Provides `InputIterator` features for iterating over a CSV file. /// /// The iterator parses rows into `std::tuple` values and iterates over the /// file row by row. /// /// \tparam Ts types of values stored in one row of the CSV file /// /// \note The iterator expects each row to consists of fields matching \p Ts. /// /// \note The implementation relies on \c rosa::csv::CSVRow, which in turn /// relies on \c rosa::csv::CSVRowParser, which is implemented only for /// `arithmetic` types -- signed and unsigned integral types and floating-point /// types -- and for \c std::string. Those are the valid values for \p Ts template class CSVIterator { public: /// \defgroup CSVIteratorTypedefs Typedefs of rosa::csv::CSVIterator /// /// Standard `typedef`s for iterators. /// ///@{ typedef std::input_iterator_tag iterator_category; ///< Category of the iterator. typedef std::tuple value_type; ///< Type of values iterated over. typedef std::size_t difference_type; ///< Type to identify distance. typedef std::tuple *pointer; ///< Pointer to the type iterated over. typedef std::tuple &reference; ///< Reference to the type iterated over. ///@} /// Creates a new instance. /// /// \param [in,out] S input stream to iterate over - CSVIterator(std::istream &S) : Str(S.good() ? &S : nullptr), Row() { + /// \param SkipRows the number of rows you want to skip only once at the beginning of the file. + /// If you have an header in the file, it is supposed to be the first row, and it will be always read out. + /// But after this header the next number of Rows will be skipped. + /// \param HeaderInfo is used to know wheter the file contains an header row or not. + /// The header has to be in the first row. + /// \param Delimeter is the separator between the differnt values of the csv file. + CSVIterator(std::istream &S, const size_t SkipRows = 0, + const HeaderInformation HeaderInfo = HeaderInformation::HasHeader, + const char Delimeter = ',') : Str(S.good() ? &S : nullptr), Row(), + SkipRows(SkipRows), HeaderInfo(HeaderInfo), Delimeter(Delimeter){ + + Row.SetSkipRows(SkipRows); + Row.SetHeaderInfo(HeaderInfo); + Row.SetDelimeter(Delimeter); + // \c rosa::csv::CSVIterator::Row is initialized empty so the first // incrementation here will read the first row. ++(*this); } /// Creates an empty new instance. CSVIterator(void) noexcept : Str(nullptr) {} /// Pre-increment operator. /// /// The implementation reads the next row. If the end of the input stream is /// reached, the operator becomes empty and has no further effect. /// /// \return \p this object after incrementing it. CSVIterator &operator++() { if (Str) { if (!((*Str) >> Row)) { Str = nullptr; } } return *this; } /// Post-increment operator. /// /// The implementation uses the pre-increment operator and returns a copy of /// the original state of \p this object. /// /// \return \p this object before incrementing it. CSVIterator operator++(int) { CSVIterator Tmp(*this); ++(*this); return Tmp; } /// Returns a constant reference to the current entry. /// /// \note Should not dereference the iterator when it is empty. /// /// \return constant reference to the current entry. const std::tuple &operator*(void)const noexcept { return Row.tuple(); } /// Returns a constant pointer to the current entry. /// /// \note Should not dereference the iterator when it is empty. /// /// \return constant pointer to the current entry. const std::tuple *operator->(void)const noexcept { return &Row.tuple(); } /// Tells if \p this object is equal to another one. /// /// Two \c rosa::csv::CSVReader instances are equal if and only if they are /// the same or both are empty. /// /// \param RHS other object to compare to /// /// \return whether \p this object is equal with \p RHS bool operator==(const CSVIterator &RHS) const noexcept { return ((this == &RHS) || ((this->Str == nullptr) && (RHS.Str == nullptr))); } /// Tells if \p this object is not equal to another one. /// /// \see rosa::csv::CSVReader::operator== /// /// \param RHS other object to compare to /// /// \return whether \p this object is not equal with \p RHS. bool operator!=(const CSVIterator &RHS) const noexcept { return !((*this) == RHS); } + /// Set the delimeter used in the csv file. + /// \param Delimeter the character which separates the values in the csv file. + inline void setDelimeter(char Delimeter) noexcept { + this->Delimeter = Delimeter; + } + + /// get the delimeter currently set to separate the values in the csv file. + /// \return the current character, which is used to separte teh values in the csv file. + inline char getDelimeter() const noexcept { + return this->Delimeter; + } + private: - std::istream *Str; ///< Input stream of a CSV file to iterate over. - CSVRow Row; ///< Content of the current row. + std::istream *Str; ///< Input stream of a CSV file to iterate over. + CSVRow Row; ///< Content of the current row + char Delimeter; ///< Delimeter between the entries in the csv file. + HeaderInformation HeaderInfo; ///< does the csv file contain a header or not, if this information is + ///< not given correclty, the reading of the header would result in + ///< in an error. + size_t SkipRows; ///< Number of Rows to skip only once at the beginning of the file. }; } // End namespace csv } // End namespace rosa #endif // ROSA_SUPPORT_CSV_CSVREADER_HPP