Difference between revisions of "CPP/TR1/Regex Tokenising"

From ProgrammingExamples
< CPP
Jump to: navigation, search
(Boost/TR1 Regular Expressions - Tokenising on multiple consecutive delimiters)
 
m (Show tokenising on punctuation as well as whitespace)
Line 1: Line 1:
<source lang="cpp">
+
<source lang="cpp">#include <string>
#include <string>
+
 
#include <algorithm>    // copy
 
#include <algorithm>    // copy
 
#include <iterator>    // back_inserter, ostream_iterator
 
#include <iterator>    // back_inserter, ostream_iterator
Line 12: Line 11:
 
     static const int submatch_off = -1;
 
     static const int submatch_off = -1;
  
     std::string str = "the\t    quick  brown\n\n fox jumped over the lazy dog";
+
     std::string str = "the\t    quick  brown\n-\n- fox jumped..over,the,lazy,.dog";
 
     std::vector<std::string> tokens;
 
     std::vector<std::string> tokens;
     std::tr1::regex re("[\\s]+");
+
     std::tr1::regex re("[\\s-,.]+");
  
 
     //start/end points of tokens in str
 
     //start/end points of tokens in str

Revision as of 11:47, 26 June 2010

#include <string>
#include <algorithm>    // copy
#include <iterator>     // back_inserter, ostream_iterator
#include <iostream>
#include <regex>        // regex, sregex_token_iterator
#include <vector>
 
int main()
{
    //flag to switch off submatching
    static const int submatch_off = -1;
 
    std::string str = "the\t    quick  brown\n-\n- fox jumped..over,the,lazy,.dog";
    std::vector<std::string> tokens;
    std::tr1::regex re("[\\s-,.]+");
 
    //start/end points of tokens in str
    std::tr1::sregex_token_iterator
        begin(str.begin(), str.end(), re, submatch_off),
        end;
 
    std::copy(begin, end, std::back_inserter(tokens));
 
    std::copy(tokens.begin(), tokens.end(),
        std::ostream_iterator<std::string>(std::cout, "\n"));
}