Difference between revisions of "CPP/TR1/Regex Tokenising"
From ProgrammingExamples
< CPP
(Boost/TR1 Regular Expressions - Tokenising on multiple consecutive delimiters) |
m (Show tokenising on punctuation as well as whitespace) |
||
Line 1: | Line 1: | ||
− | <source lang="cpp"> | + | <source lang="cpp">#include <string> |
− | #include <string> | + | |
#include <algorithm> // copy | #include <algorithm> // copy | ||
#include <iterator> // back_inserter, ostream_iterator | #include <iterator> // back_inserter, ostream_iterator | ||
Line 12: | Line 11: | ||
static const int submatch_off = -1; | static const int submatch_off = -1; | ||
− | std::string str = "the\t quick brown\n\n fox jumped over the lazy dog"; | + | std::string str = "the\t quick brown\n-\n- fox jumped..over,the,lazy,.dog"; |
std::vector<std::string> tokens; | std::vector<std::string> tokens; | ||
− | std::tr1::regex re("[\\s]+"); | + | std::tr1::regex re("[\\s-,.]+"); |
//start/end points of tokens in str | //start/end points of tokens in str |
Revision as of 11:47, 26 June 2010
#include <string> #include <algorithm> // copy #include <iterator> // back_inserter, ostream_iterator #include <iostream> #include <regex> // regex, sregex_token_iterator #include <vector> int main() { //flag to switch off submatching static const int submatch_off = -1; std::string str = "the\t quick brown\n-\n- fox jumped..over,the,lazy,.dog"; std::vector<std::string> tokens; std::tr1::regex re("[\\s-,.]+"); //start/end points of tokens in str std::tr1::sregex_token_iterator begin(str.begin(), str.end(), re, submatch_off), end; std::copy(begin, end, std::back_inserter(tokens)); std::copy(tokens.begin(), tokens.end(), std::ostream_iterator<std::string>(std::cout, "\n")); }