Page MenuHomePhorge

No OneTemporary

Size
17 KB
Referenced Files
None
Subscribers
None
diff --git a/include/rosa/support/csv/CSVReader.hpp b/include/rosa/support/csv/CSVReader.hpp
index 7e29ad8..95c1fda 100755
--- a/include/rosa/support/csv/CSVReader.hpp
+++ b/include/rosa/support/csv/CSVReader.hpp
@@ -1,419 +1,480 @@
//===-- rosa/support/csv/CSVReader.hpp --------------------------*- C++ -*-===//
//
// The RoSA Framework
//
//===----------------------------------------------------------------------===//
///
/// \file rosa/support/csv/CSVReader.hpp
///
/// \author David Juhasz (david.juhasz@tuwien.ac.at)
///
/// \date 2017-2019
///
/// \brief Facitilities to read CSV files.
///
/// \note The implementation is based on the solution at
/// https://stackoverflow.com/a/1120224
///
//===----------------------------------------------------------------------===//
#ifndef ROSA_SUPPORT_CSV_CSVREADER_HPP
#define ROSA_SUPPORT_CSV_CSVREADER_HPP
#include "rosa/support/debug.hpp"
#include <istream>
#include <sstream>
#include <vector>
+#include <algorithm>
namespace rosa {
namespace csv {
/// Anonymous namespace providing implementation details for
/// \c rosa::csv::CSVIterator, consider it private.
namespace {
/// Provides facility for parsing values from one row CSV data.
///
/// \tparam T type of values to parse from the line
/// \tparam IsSignedInt if \p T is a signed integral type, always use default
/// \tparam IsUnsignedInt if \p T is an unsigned integral type, always use
/// default
/// \tparam IsFloat if \p T is a floating-point type, always use default
/// \tparam IsString if \p T is \c std::string, always use default
///
/// \note Specializations of this `struct` are provided for arithmentic types
/// and \c std::string.
template <typename T, bool IsSignedInt = (std::is_integral<T>::value &&
std::is_signed<T>::value),
bool IsUnsignedInt =
(std::is_integral<T>::value && std::is_unsigned<T>::value),
bool IsFloat = std::is_floating_point<T>::value,
bool IsString = std::is_same<T, std::string>::value>
struct CSVRowParser;
/// Specialization for signed integral types.
///
/// \tparam T type of values to parse from the line
///
/// \pre \p T is a signed integral type:\code
/// std::is_integral<T>::value && std::is_signed<T>::value
/// \endcode
template <typename T> struct CSVRowParser<T, true, false, false, false> {
STATIC_ASSERT((std::is_integral<T>::value && std::is_signed<T>::value),
"wrong type"); // Sanity check.
/// Parses a given row of CSV data into a given container.
///
/// \p Data is cleared and then filled with values parsed from \p LineStream.
/// Entries in the line are to be separated by commas, the character `,`. A
/// trailing comma results in an empty entry at the end of the line. No empty
/// entry should be present otherwise.
///
/// \note Parsed values are silently converted to type \p T.
///
/// \param [in,out] LineStream the line to parse
/// \param [in,out] Data the container to store the parsed values
static void parse(std::stringstream &LineStream, std::vector<T> &Data, char Delimeter) {
std::string Cell;
Data.clear();
while (std::getline(LineStream, Cell, Delimeter)) {
Data.push_back(static_cast<T>(std::stoll(Cell)));
}
// This checks for a trailing comma with no data after it.
if (!LineStream && Cell.empty()) {
// If there was a trailing comma then add an empty element.
Data.push_back(0);
}
}
};
/// Specialization for unsigned integral types.
///
/// \tparam T type of values to parse from the line
///
/// \pre \p T is an unsigned integral type:\code
/// std::is_integral<T>::value && std::is_unsigned<T>::value
/// \endcode
template <typename T> struct CSVRowParser<T, false, true, false, false> {
STATIC_ASSERT((std::is_integral<T>::value && std::is_unsigned<T>::value),
"wrong type"); // Sanity check.
/// Parses a given row of CSV data into a given container.
///
/// \p Data is cleared and then filled with values parsed from \p LineStream.
/// Entries in the line are to be separated by commas, the character `,`. A
/// trailing comma results in an empty entry at the end of the line. No empty
/// entry should be present otherwise.
///
/// \note Parsed values are silently converted to type \p T.
///
/// \param [in,out] LineStream the line to parse
/// \param [in,out] Data the container to store the parsed values
static void parse(std::stringstream &LineStream, std::vector<T> &Data, char Delimeter = ',') {
std::string Cell;
Data.clear();
while (std::getline(LineStream, Cell, Delimeter)) {
Data.push_back(static_cast<T>(std::stoull(Cell)));
}
// This checks for a trailing comma with no data after it.
if (!LineStream && Cell.empty()) {
// If there was a trailing comma then add an empty element.
Data.push_back(0);
}
}
};
/// Specialization for floating-point types.
///
/// \tparam T type of values to parse from the line
///
/// \pre \p T is a floating-point type:\code
/// std::is_floating_point<T>::value
/// \endcode
template <typename T> struct CSVRowParser<T, false, false, true, false> {
STATIC_ASSERT((std::is_floating_point<T>::value),
"wrong type"); // Sanity check.
/// Parses a given row of CSV data into a given container.
///
/// \p Data is cleared and then filled with values parsed from \p LineStream.
/// Entries in the line are to be separated by commas, the character `,`. A
/// trailing comma results in an empty entry at the end of the line. No empty
/// entry should be present otherwise.
///
/// \note Parsed values are silently converted to type \p T.
///
/// \param [in,out] LineStream the line to parse
/// \param [in,out] Data the container to store the parsed values
static void parse(std::stringstream &LineStream, std::vector<T> &Data, char Delimeter = ',') {
std::string Cell;
Data.clear();
while (std::getline(LineStream, Cell, Delimeter)) {
Data.push_back(static_cast<T>(std::stold(Cell)));
}
// This checks for a trailing comma with no data after it.
if (!LineStream && Cell.empty()) {
// If there was a trailing comma then add an empty element.
Data.push_back(0);
}
}
};
/// Specialization for \c std::string.
///
/// \tparam T type of values to parse from the line
///
/// \pre \p T is \c std::string:\code
/// std::is_same<T, std::string>::value
/// \endcode
template <typename T> struct CSVRowParser<T, false, false, false, true> {
STATIC_ASSERT((std::is_same<T, std::string>::value),
"wrong type"); // Sanity check.
/// Parses a given row of CSV data into a given container.
///
/// \p Data is cleared and then filled with values parsed from \p LineStream.
/// Entries in the line are to be separated by commas, the character `,`. A
/// trailing comma results in an empty entry at the end of the line. No empty
/// entry should be present otherwise.
///
/// \param [in,out] LineStream the line to parse
/// \param [in,out] Data the container to store the parsed values
static void parse(std::stringstream &LineStream, std::vector<T> &Data, char Delimeter = ',') {
std::string Cell;
Data.clear();
while (std::getline(LineStream, Cell, Delimeter)) {
Data.push_back(Cell);
}
// This checks for a trailing comma with no data after it.
if (!LineStream && Cell.empty()) {
// If there was a trailing comma then add an empty element.
Data.push_back("");
}
}
};
/// Parses and stores entries from a row of CSV data.
///
/// \tparam T type of values to parse and store, i.e. entries in the row
///
/// \note The implementation relies on \c rosa::csv::CSVRowParser, which is
/// implemented only for `arithmetic` types -- signed and unsigned integral and
/// floating-point types -- and for \c std::string. Those are the valid values
/// for \p T.
template <typename T>
class CSVRow {
public:
/// Gives a constant reference for an entry at a given position of the row.
///
/// \note No bounds checking is performed.
///
/// \param Index the position of the entry
///
/// \return constant reference for the stored entry at position \p Index
const T &operator[](const size_t Index) const noexcept { return Data[Index]; }
/// Tells the number of entries stored in the row.
///
/// \return number of stored entries.
size_t size(void) const noexcept { return Data.size(); }
/// Parses and stores one row of CSV data.
///
/// The function reads one line from \p Str and parses it into
/// \c rosa::csv::CSVRow::Data using \c rosa::csv::CSVRowParser.
///
/// \param [in,out] Str input stream of a CSV file
void readNextRow(std::istream &Str) {
std::string Line;
std::getline(Str, Line);
std::stringstream LineStream(Line);
+
CSVRowParser<T>::parse(LineStream, Data, Delimeter);
}
+ bool isNumeric(const std::string& input){
+ return std::all_of(input.begin(), input.end(), ::isdigit);
+ }
+
+ void checkIfHeader(std::istream &Str){
+ std::string Line;
+ std::getline(Str, Line);
+ std::vector<std::string> FirstRowValues;
+ std::stringstream LineStream(Line);
+ std::string Value;
+ bool HasHeaderLocal = true;
+
+ CSVRowParser<std::string>::parse(LineStream, FirstRowValues, Delimeter);
+ for(std::vector<std::string>::iterator it = FirstRowValues.begin();
+ it != FirstRowValues.end(); ++it){
+ Value = *it;
+ if(isNumeric(Value)){
+ HasHeaderLocal = false;
+ }
+ }
+ if(HasHeaderLocal){
+ Header.swap(FirstRowValues);
+ /* // only for debugging purpose.
+ for(std::vector<std::string>::iterator it = Header.begin();
+ it != Header.end(); ++it){
+ std::cout << *it << ", ";
+ }
+ std::cout << std::endl; */
+ }else {
+ std::stringstream LineStream2(Line);
+ CSVRowParser<T>::parse(LineStream2, Data, Delimeter);
+ }
+ HasHeader = HasHeaderLocal;
+ isFirstRow = false;
+ }
+
+ bool isHavingHeader(){
+ return HasHeader;
+ }
+
inline void setDelimeter(char Delimeter){
this->Delimeter = Delimeter;
}
inline char getDelimeter(){
return this->Delimeter;
}
inline void setEndOfLine(char EndOfLine){
this->EndOfLine = EndOfLine;
}
inline char getEndOfLine(){
return this->EndOfLine;
}
+ inline bool isThisFirstRow(){
+ return this->isFirstRow;
+ }
+
private:
- std::vector<T> Data; ///< Stores parsed entries
- uint64_t RowNumber = 0; ///< Current row number
- bool HasHeader = false; ///< Has the current csv file a header
- std::vector<std::string> Header; /// < stores the header entries if available
- char Delimeter = ','; /// < stores the delimeter between data entries
- char EndOfLine = '\n'; ///< stores the end of line character
+ std::vector<T> Data; ///< Stores parsed entries
+ uint64_t RowNumber = 0; ///< Current row number
+ bool isFirstRow = true; ///< is this the first row
+ bool isFirstRowRead = false; ///< is the first row read already
+ bool HasHeader = false; ///< Has the current csv file a header
+ std::vector<std::string> Header; /// < stores the header entries if available
+ char Delimeter = ','; /// < stores the delimeter between data entries
+ char EndOfLine = '\n'; ///< stores the end of line character
};
/// Reads a row of CSV data into \c rosa::csv::CSVRow.
///
/// The next line is read from \p Str by calling
/// \c rosa::csv::CSVRow::readNextRow on \p Data.
///
/// \note A CSV file should contain no empty lines.
///
/// \param [in,out] Str input stream of a CSV file
/// \param [in,out] Data object to read the next line into
///
/// \return \p Str after reading one line from it
template <typename T>
std::istream &operator>>(std::istream &Str, CSVRow<T> &Data) {
- Data.readNextRow(Str);
+ if (Data.isThisFirstRow()){
+ Data.checkIfHeader(Str);
+ if(Data.isHavingHeader()){
+ Data.readNextRow(Str);
+ }
+ }else {
+ Data.readNextRow(Str);
+ }
+ /* // just for debugging purpose
+ char c;
+ while(Str.get(c)){
+ std::cout << c;
+ }
+ std::cout << std::endl;
+ */
return Str;
}
} // End namespace
/// Provides `InputIterator` features for iterating over a CSV file in a
/// flat way.
///
/// The iterator hides rows of the CSV file, and iterates over the entries
/// row-by-row.
///
/// \note A CSV file should contain no empty lines.
///
/// \tparam T type of values to iterate over, i.e. entries in the CSV file.
///
/// \note The implementation relies on \c rosa::csv::CSVRow, which in turn
/// relies on \c rosa::csv::CSVRowParser, which is implemented only for
/// `arithmetic` types -- signed and unsigned integral types and floating-point
/// types -- and for \c std::string. Those are the valid values for \p T.
template <typename T>
class CSVFlatIterator {
public:
/// \defgroup CSVFlatIteratorTypedefs Typedefs of rosa::csv::CSVFlatIterator
///
/// Standard `typedef`s for iterators.
///
///@{
typedef std::input_iterator_tag
iterator_category; ///< Category of the iterator.
typedef T value_type; ///< Type of values iterated over.
typedef std::size_t difference_type; ///< Type to identify distance.
typedef T *pointer; ///< Pointer to the type iterated over.
typedef T &reference; ///< Reference to the type iterated over.
///@}
/// Creates a new instance.
///
/// \param [in,out] S input stream to iterate over
CSVFlatIterator(std::istream &S)
: Str(S.good() ? &S : nullptr),
Pos((size_t)(-1)),
Delimeter(','), EndOfLine('\n') {
Row.setDelimeter(Delimeter);
- Row.setDelimeter('a');
Row.setEndOfLine(EndOfLine);
// \c rosa::csv::CSVFlatIterator::Pos is initialized to `-1` so the first
// incrementation here will set it properly.
++(*this);
}
/// Creates an empty new instance.
CSVFlatIterator(void) noexcept : Str(nullptr) {}
/// Pre-increment operator.
///
/// The implementation moves over the entries in the current row and advances
/// to the next row when the end of the current row is reached. If the end of
/// the input stream is reached, the operator becomes empty and has no
/// further effect.
///
/// \return \p this object after incrementing it.
CSVFlatIterator &operator++() {
if (Str) {
++Pos;
if (Pos == Row.size()) {
if (!((*Str) >> Row)) {
Str = nullptr;
--Pos; // Stay on the last entry forever.
} else {
Pos = 0;
}
}
}
return *this;
}
/// Post-increment operator.
///
/// The implementation uses the pre-increment operator and returns a copy of
/// the original state of \p this object.
///
/// \return \p this object before incrementing it.
CSVFlatIterator operator++(int) {
CSVFlatIterator Tmp(*this);
++(*this);
return Tmp;
}
/// Returns a constant reference to the current entry.
///
/// \note Should not dereference the iterator when it is empty.
///
/// \return constant reference to the current entry.
const T &operator*(void)const noexcept { return Row[Pos]; }
/// Returns a constant pointer to the current entry.
///
/// \note Should not dereference the iterator when it is empty.
///
/// \return constant pointer to the current entry.
const T *operator->(void)const noexcept { return &Row[Pos]; }
/// Tells if \p this object is equal to another one.
///
/// Two \c rosa::csv::CSVReader instances are equal if and only if they are
/// the same or both are empty.
///
/// \param RHS other object to compare to
///
/// \return whether \p this object is equal with \p RHS
bool operator==(const CSVFlatIterator &RHS) const noexcept {
return ((this == &RHS) || ((this->Str == nullptr) && (RHS.Str == nullptr)));
}
/// Tells if \p this object is not equal to another one.
///
/// \see rosa::csv::CSVReader::operator==
///
/// \param RHS other object to compare to
///
/// \return whether \p this object is not equal with \p RHS.
bool operator!=(const CSVFlatIterator &RHS) const noexcept {
return !((*this) == RHS);
}
inline void setDelimeter(char Delimter){
this->Delimeter = Delimter;
}
inline char getDelimeter(){
return this->Delimeter;
}
private:
std::istream *Str; ///< Input stream of a CSV file to iterate over.
CSVRow<T> Row; ///< Content of the current row iterating over.
size_t Pos; ///< Current position within the current row.
char Delimeter; ///< Delimeter between the entries
char EndOfLine; ///< stores the end of line character
};
} // End namespace csv
} // End namespace rosa
#endif // ROSA_SUPPORT_CSV_CSVREADER_HPP

File Metadata

Mime Type
text/x-diff
Expires
Mon, Oct 20, 6:06 PM (21 h, 2 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
199743
Default Alt Text
(17 KB)

Event Timeline