Difference between revisions of "CPP/TR1/Regex Tokenising"

From ProgrammingExamples
< CPP
Jump to: navigation, search
m (Show tokenising on punctuation as well as whitespace)
m (Vertical spacing/alignment)
 
Line 23: Line 23:
  
 
     std::copy(tokens.begin(), tokens.end(),
 
     std::copy(tokens.begin(), tokens.end(),
        std::ostream_iterator<std::string>(std::cout, "\n"));
+
              std::ostream_iterator<std::string>(std::cout, "\n"));
 
}
 
}
 
</source>
 
</source>

Latest revision as of 11:47, 26 June 2010

#include <string>
#include <algorithm>    // copy
#include <iterator>     // back_inserter, ostream_iterator
#include <iostream>
#include <regex>        // regex, sregex_token_iterator
#include <vector>
 
int main()
{
    //flag to switch off submatching
    static const int submatch_off = -1;
 
    std::string str = "the\t    quick  brown\n-\n- fox jumped..over,the,lazy,.dog";
    std::vector<std::string> tokens;
    std::tr1::regex re("[\\s-,.]+");
 
    //start/end points of tokens in str
    std::tr1::sregex_token_iterator
        begin(str.begin(), str.end(), re, submatch_off),
        end;
 
    std::copy(begin, end, std::back_inserter(tokens));
 
    std::copy(tokens.begin(), tokens.end(),
              std::ostream_iterator<std::string>(std::cout, "\n"));
}