Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F386766
CSVReader.hpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Size
17 KB
Referenced Files
None
Subscribers
None
CSVReader.hpp
View Options
//===-- rosa/support/csv/CSVReader.hpp --------------------------*- C++ -*-===//
//
// The RoSA Framework
//
// Distributed under the terms and conditions of the Boost Software License 1.0.
// See accompanying file LICENSE.
//
// If you did not receive a copy of the license file, see
// http://www.boost.org/LICENSE_1_0.txt.
//
//===----------------------------------------------------------------------===//
///
/// \file rosa/support/csv/CSVReader.hpp
///
/// \authors David Juhasz (david.juhasz@tuwien.ac.at), Edwin Willegger (edwin.willegger@tuwien.ac.at)
///
/// \date 2017-2019
///
/// \brief Facitilities to read CSV files.
///
/// \note The implementation is based on the solution at
/// https://stackoverflow.com/a/1120224
///
//===----------------------------------------------------------------------===//
#ifndef ROSA_SUPPORT_CSV_CSVREADER_HPP
#define ROSA_SUPPORT_CSV_CSVREADER_HPP
#include
"rosa/support/debug.hpp"
#include
"rosa/support/sequence.hpp"
#include
<istream>
#include
<sstream>
#include
<vector>
#include
<map>
#include
<algorithm>
#include
<set>
namespace
rosa
{
namespace
csv
{
/// Indicating it the CSV file contains any header or not
enum
class
HeaderInformation
{
HasHeader
,
HasNoHeader
};
/// Anonymous namespace providing implementation details for
/// \c rosa::csv::CSVIterator, consider it private.
namespace
{
/// Provides facility for parsing one value from a string.
///
/// \tparam T type of value to parse
/// \tparam IsSignedInt if \p T is a signed integral type, always use default
/// \tparam IsUnsignedInt if \p T is an unsigned integral type, always use
/// default
/// \tparam IsFloat if \p T is a floating-point type, always use default
/// \tparam IsString if \p T is \c std::string, always use default
///
/// \note Specializations of this struct are provided for arithmentic types
/// and \c std::string.
template
<
typename
T
,
bool
IsSignedInt
=
(
std
::
is_integral
<
T
>::
value
&&
std
::
is_signed
<
T
>::
value
),
bool
IsUnsignedInt
=
(
std
::
is_integral
<
T
>::
value
&&
std
::
is_unsigned
<
T
>::
value
),
bool
IsFloat
=
std
::
is_floating_point
<
T
>::
value
,
bool
IsString
=
std
::
is_same
<
T
,
std
::
string
>::
value
>
struct
ValueParser
{
///
///
/// \param Cell the \c std::string to parse
///
/// \return the parsed value
///
/// \note The function silently fails if cannot parse \p Cell for type \p T.
static
T
parse
(
const
std
::
string
&
Cell
)
noexcept
;
};
template
<
typename
T
>
struct
ValueParser
<
T
,
true
,
false
,
false
,
false
>
{
STATIC_ASSERT
((
std
::
is_integral
<
T
>::
value
&&
std
::
is_signed
<
T
>::
value
),
"wrong type"
);
// Sanity check.
static
T
parse
(
const
std
::
string
&
Cell
)
noexcept
{
return
static_cast
<
T
>
(
std
::
stoll
(
Cell
));
}
};
template
<
typename
T
>
struct
ValueParser
<
T
,
false
,
true
,
false
,
false
>
{
STATIC_ASSERT
((
std
::
is_integral
<
T
>::
value
&&
std
::
is_unsigned
<
T
>::
value
),
"wrong type"
);
// Sanity check.
static
T
parse
(
const
std
::
string
&
Cell
)
noexcept
{
return
static_cast
<
T
>
(
std
::
stoull
(
Cell
));
}
};
template
<
typename
T
>
struct
ValueParser
<
T
,
false
,
false
,
true
,
false
>
{
STATIC_ASSERT
((
std
::
is_floating_point
<
T
>::
value
),
"wrong type"
);
// Sanity check.
static
T
parse
(
const
std
::
string
&
Cell
)
noexcept
{
return
static_cast
<
T
>
(
std
::
stold
(
Cell
));
}
};
template
<
typename
T
>
struct
ValueParser
<
T
,
false
,
false
,
false
,
true
>
{
STATIC_ASSERT
((
std
::
is_same
<
T
,
std
::
string
>::
value
),
"wrong type"
);
// Sanity check.
static
T
parse
(
const
std
::
string
&
Cell
)
noexcept
{
return
Cell
;
}
};
/// Parses and stores entries from a row of CSV data.
///
/// \tparam Ts types of values to parse and store, i.e. entries in the row
///
/// \note The implementation relies on \c rosa::csv::CSVRowParser, which is
/// implemented only for `arithmetic` types -- signed and unsigned integral
/// and floating-point types -- and for \c std::string. Those are the valid
/// values for \p Ts.
template
<
typename
...
Ts
>
class
CSVRow
{
private
:
/// Parses a given row of CSV data into \c CSVRow::Data.
///
/// \ CSVRow::Data is filled with values parsed from \p LineStream. Entries
/// in the line are to be separated by commas, the character `,`.
///
/// \note Parsed values are silently converted to types \p Ts.
///
/// \note Parsing silently fails if values do not match \p Ts.
///
/// \tparam S0 indices to access tuple elements.
///
/// \param [in,out] LineStream the line to parse
///
/// \note The last argument is used only to get \p S0, the actual value of
/// the parameter is ignored.
template
<
size_t
...
S0
>
void
parseRow
(
std
::
stringstream
&
LineStream
,
char
Delimiter
,
Seq
<
S0
...
>
)
{
STATIC_ASSERT
(
sizeof
...(
Ts
)
==
sizeof
...(
S0
),
"Not matching template arguments."
);
std
::
string
Cell
;
// Get fields and parse the values into the proper element of the tuple
// one by one in a fold expression.
((
std
::
getline
(
LineStream
,
Cell
,
Delimiter
),
std
::
get
<
S0
>
(
Data
)
=
ValueParser
<
Ts
>::
parse
(
Cell
)),
...);
}
public
:
/// Constructor with all possible parameters
///
/// The function creates an instance of an CSVRow object and sets the attributes of the
/// object to the values of the parameters.
///
/// \param SkipRows the number of data rows to skip, not taking header into account.
/// \param HeaderInfo is the first line of the file a header row or not.
/// \param Delimiter to seperate between the data entries within one row.
CSVRow
(
const
size_t
SkipRows
=
0
,
const
HeaderInformation
HeaderInfo
=
HeaderInformation
::
HasHeader
,
const
char
Delimiter
=
','
)
:
SkipRows
(
SkipRows
),
HeaderInfo
(
HeaderInfo
),
Delimiter
(
Delimiter
),
RowNumber
(
0
),
IsHeaderRead
(
false
)
{
}
/// Parses and stores one row of CSV data.
///
/// The function reads one line from \p Str and parses it into
/// \c rosa::csv::CSVRow::Data using \c rosa::csv::CSVRowParser.
///
/// \param [in,out] Str input stream of a CSV file
void
readNextRow
(
std
::
istream
&
Str
)
noexcept
{
std
::
string
Line
;
std
::
getline
(
Str
,
Line
);
if
(
Line
.
size
()
>
0
){
std
::
stringstream
LineStream
(
Line
);
parseRow
(
LineStream
,
Delimiter
,
seq_t
<
sizeof
...(
Ts
)
>
());
RowNumber
=
RowNumber
+
1
;
}
}
/// Read header row and stores it as \p std::string.
///
/// The function reads the first line of the csv file and stores the entries
/// in a vector.
///
/// \param [in,out] Str input stream of a CSV file
void
readHeader
(
std
::
istream
&
Str
)
noexcept
{
std
::
string
Line
;
std
::
getline
(
Str
,
Line
);
std
::
stringstream
LineStream
(
Line
);
std
::
string
Value
;
while
(
getline
(
LineStream
,
Value
,
Delimiter
)
){
Header
.
push_back
(
Value
);
}
IsHeaderRead
=
true
;
}
/// The number of rows to skip once.
///
/// This function returns the number of data rows to skip
/// at the beginning of the file.
///
/// \return The number of rows to skip at the beginning of a csv file.
inline
size_t
SkipNumRows
()
const
noexcept
{
return
this
->
SkipRows
;
}
/// The current row number within the csv file.
///
/// This function returns the current row number. The header
/// row is not counted as a row.
///
/// \returns the current row number within the csv file.
inline
size_t
CurRow
()
const
noexcept
{
return
this
->
RowNumber
;
}
/// Indiciates if the header was already read.
///
/// This function returns true, if the header of a csv file which contains
/// a header file is already read.
/// The user has to pass in the attribute HeaderInfo the information if the
/// file has in the first row the header row or not.
///
/// \return if the header of a file is already read.
inline
bool
IsHeaderReadDone
()
const
noexcept
{
return
this
->
IsHeaderRead
;
}
/// Indicates if the file contains a header row in the first row.
///
/// This function returns if the file contains a header row.
/// The information if the file contains a header row or not, has to be passed by the user.
/// The standard value is HeaderInformation::HasHeader
///
/// \return if the csv file contains a header row in the first line of the file.
inline
HeaderInformation
HasFileHeader
()
const
noexcept
{
return
this
->
HeaderInfo
;
}
/// Set the number of rows to skip.
///
/// This function sets the number of rows to skip at the beginning of
/// the reading of the file.
///
/// \param SkipRowsBeginning the number of rows you want to skip at the beginning of the file.
inline
void
SetSkipRows
(
const
size_t
SkipRowsBeginning
)
noexcept
{
this
->
SkipRows
=
SkipRowsBeginning
;
}
/// Is the first row a header row or not.
///
/// This function sets the information, if the first row of the csv file
/// is a header line or not.
///
/// \param HeaderInf if the first row is a header row or not.
inline
void
SetHeaderInfo
(
const
HeaderInformation
HeaderInf
)
noexcept
{
this
->
HeaderInfo
=
HeaderInf
;
}
/// Set the seperator between data entries.
///
/// This funcction sets the separator between the data entries of the csv file.
///
/// \param separator the character that separates the data values.
inline
void
SetDelimiter
(
char
separator
)
{
this
->
Delimiter
=
separator
;
}
/// Gives a constant references for the \c std::tuple containing the values
/// read by \p this object.
///
/// \return \c CSVRow::Data
const
std
::
tuple
<
Ts
...
>
&
tuple
(
void
)
const
noexcept
{
return
Data
;
}
private
:
std
::
tuple
<
Ts
...
>
Data
;
///< Stores parsed entries
size_t
SkipRows
;
///< The number of rows to skip at the very beginning of the file.
///< This number only applies on the number of data rows.
///< If your file contains a header row and data rows, the skiping
///< of the header row is not taken into account.
HeaderInformation
HeaderInfo
;
///< If the file contains a header row or not.
char
Delimiter
;
///< The seperator between the data entries.
size_t
RowNumber
;
///< Current row number, counts all row numbers including the header row.
bool
IsHeaderRead
;
///< Was the header read or not.
std
::
vector
<
std
::
string
>
Header
;
///< The content of the header row.
};
/// Reads a row of CSV data into \c rosa::csv::CSVRow.
///
/// The next line is read from \p Str by calling
/// \c rosa::csv::CSVRow::readNextRow on \p Data until all lines are
/// skipped.
///
/// If the function is called for the first time and the file contains
/// a header than is the header and the first data row read in after the
/// number of rows that the user wants to skip.
///
/// \tparam Ts type of values to read from the row
///
/// \note The CSV file should contain a line with fields matching \p Ts...
///
/// \param [in,out] Str input stream of a CSV file
/// \param [in,out] Data object to read the next line into
///
/// \return \p Str after reading one line from it
template
<
typename
...
Ts
>
std
::
istream
&
operator
>>
(
std
::
istream
&
Str
,
CSVRow
<
Ts
...
>
&
Data
)
{
if
(
Data
.
HasFileHeader
()
==
HeaderInformation
::
HasHeader
&&
!
Data
.
IsHeaderReadDone
()
)
{
Data
.
readHeader
(
Str
);
}
while
(
Data
.
CurRow
()
<
(
Data
.
SkipNumRows
())){
Data
.
readNextRow
(
Str
);
}
//read the lines after you skipped the number of rows you want to skip
Data
.
readNextRow
(
Str
);
return
Str
;
}
}
// End namespace
/// Provides `InputIterator` features for iterating over a CSV file.
///
/// The iterator parses rows into `std::tuple` values and iterates over the
/// file row by row.
///
/// \tparam Ts types of values stored in one row of the CSV file
///
/// \note The iterator expects each row to consists of fields matching \p Ts.
///
/// \note The implementation relies on \c rosa::csv::CSVRow, which in turn
/// relies on \c rosa::csv::CSVRowParser, which is implemented only for
/// `arithmetic` types -- signed and unsigned integral types and floating-point
/// types -- and for \c std::string. Those are the valid values for \p Ts
template
<
typename
...
Ts
>
class
CSVIterator
{
public
:
/// \defgroup CSVIteratorTypedefs Typedefs of rosa::csv::CSVIterator
///
/// Standard `typedef`s for iterators.
///
///@{
typedef
std
::
input_iterator_tag
iterator_category
;
///< Category of the iterator.
typedef
std
::
tuple
<
Ts
...
>
value_type
;
///< Type of values iterated over.
typedef
std
::
size_t
difference_type
;
///< Type to identify distance.
typedef
std
::
tuple
<
Ts
...
>
*
pointer
;
///< Pointer to the type iterated over.
typedef
std
::
tuple
<
Ts
...
>
&
reference
;
///< Reference to the type iterated over.
///@}
/// Creates a new instance.
///
/// \param [in,out] S input stream to iterate over
/// \param SkipRows the number of rows you want to skip only once at the beginning of the file.
/// If you have an header in the file, it is supposed to be the first row, and it will be always read out.
/// But after this header the next number of Rows will be skipped.
/// \param HeaderInfo is used to know wheter the file contains an header row or not.
/// The header has to be in the first row.
/// \param Delimiter is the separator between the differnt values of the csv file.
CSVIterator
(
std
::
istream
&
S
,
const
size_t
SkipRows
=
0
,
const
HeaderInformation
HeaderInfo
=
HeaderInformation
::
HasHeader
,
const
char
Delimiter
=
','
)
:
Str
(
S
.
good
()
?
&
S
:
nullptr
),
SkipRows
(
SkipRows
),
HeaderInfo
(
HeaderInfo
),
Delimiter
(
Delimiter
),
Row
(){
Row
.
SetSkipRows
(
SkipRows
);
Row
.
SetHeaderInfo
(
HeaderInfo
);
Row
.
SetDelimiter
(
Delimiter
);
// \c rosa::csv::CSVIterator::Row is initialized empty so the first
// incrementation here will read the first row.
++
(
*
this
);
}
/// Creates an empty new instance.
CSVIterator
(
void
)
noexcept
:
Str
(
nullptr
),
SkipRows
(
0
),
HeaderInfo
(
HeaderInformation
::
HasHeader
),
Delimiter
(
','
),
Row
()
{}
/// Pre-increment operator.
///
/// The implementation reads the next row. If the end of the input stream is
/// reached, the operator becomes empty and has no further effect.
///
/// \return \p this object after incrementing it.
CSVIterator
&
operator
++
()
{
if
(
Str
)
{
if
(
!
((
*
Str
)
>>
Row
))
{
Str
=
nullptr
;
}
}
return
*
this
;
}
/// Post-increment operator.
///
/// The implementation uses the pre-increment operator and returns a copy of
/// the original state of \p this object.
///
/// \return \p this object before incrementing it.
CSVIterator
operator
++
(
int
)
{
CSVIterator
Tmp
(
*
this
);
++
(
*
this
);
return
Tmp
;
}
/// Returns a constant reference to the current entry.
///
/// \note Should not dereference the iterator when it is empty.
///
/// \return constant reference to the current entry.
const
std
::
tuple
<
Ts
...
>
&
operator
*
(
void
)
const
noexcept
{
return
Row
.
tuple
();
}
/// Returns a constant pointer to the current entry.
///
/// \note Should not dereference the iterator when it is empty.
///
/// \return constant pointer to the current entry.
const
std
::
tuple
<
Ts
...
>
*
operator
->
(
void
)
const
noexcept
{
return
&
Row
.
tuple
();
}
/// Tells if \p this object is equal to another one.
///
/// Two \c rosa::csv::CSVReader instances are equal if and only if they are
/// the same or both are empty.
///
/// \param RHS other object to compare to
///
/// \return whether \p this object is equal with \p RHS
bool
operator
==
(
const
CSVIterator
&
RHS
)
const
noexcept
{
return
((
this
==
&
RHS
)
||
((
this
->
Str
==
nullptr
)
&&
(
RHS
.
Str
==
nullptr
)));
}
/// Tells if \p this object is not equal to another one.
///
/// \see rosa::csv::CSVReader::operator==
///
/// \param RHS other object to compare to
///
/// \return whether \p this object is not equal with \p RHS.
bool
operator
!=
(
const
CSVIterator
&
RHS
)
const
noexcept
{
return
!
((
*
this
)
==
RHS
);
}
/// Set the delimiter used in the csv file.
/// \param Separator the character which separates the values in the csv file.
inline
void
setDelimiter
(
char
Separator
)
noexcept
{
this
->
Delimiter
=
Separator
;
}
/// get the delimiter currently set to separate the values in the csv file.
/// \return the current character, which is used to separte teh values in the csv file.
inline
char
getDelimiter
()
const
noexcept
{
return
this
->
Delimiter
;
}
private
:
std
::
istream
*
Str
;
///< Input stream of a CSV file to iterate over.
size_t
SkipRows
;
///< Number of Rows to skip only once at the beginning of the file.
HeaderInformation
HeaderInfo
;
///< does the csv file contain a header or not, if this information is
///< not given correclty, the reading of the header would result in
///< in an error.
char
Delimiter
;
///< Delimiter between the entries in the csv file.
CSVRow
<
Ts
...
>
Row
;
///< Content of the current row
};
}
// End namespace csv
}
// End namespace rosa
#endif
// ROSA_SUPPORT_CSV_CSVREADER_HPP
File Metadata
Details
Attached
Mime Type
text/x-c++
Expires
Fri, Jul 4, 7:27 AM (1 h, 1 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
157522
Default Alt Text
CSVReader.hpp (17 KB)
Attached To
Mode
R20 SoC_Rosa_repo
Attached
Detach File
Event Timeline
Log In to Comment