From 7f6d4c67d0abfa1b112526824d5bd5f68e21f6f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20W=C3=A4ckerlin?= Date: Thu, 16 Dec 2004 13:09:31 +0000 Subject: [PATCH] possibility to evaluate and extract sub expressions --- mrw/regexp.cpp | 30 ++++++++++++++++++++++++------ mrw/regexp.hpp | 39 +++++++++++++++++++++++++++++++++------ mrw/regexp_test.cpp | 18 ++++++++++++++++++ 3 files changed, 75 insertions(+), 12 deletions(-) diff --git a/mrw/regexp.cpp b/mrw/regexp.cpp index 3d882c0..3c64c74 100644 --- a/mrw/regexp.cpp +++ b/mrw/regexp.cpp @@ -9,28 +9,46 @@ @license LGPL, see file COPYING $Log$ + Revision 1.2 2004/12/16 13:09:31 marc + possibility to evaluate and extract sub expressions + Revision 1.1 2004/12/14 20:20:30 marc initial version */ +#include #include #include namespace mrw { - RegExp::RegExp(const std::string& pattern, int flags) - throw(std::exception, std::bad_exception) { - if (regcomp(&_regex, pattern.c_str(), flags|nosub)) - throw(mrw::invalid_argument(pattern)); + RegExp::RegExp(const std::string& pattern, bool hassub, int flags) + throw(std::exception): + _hassub(hassub) { + if (flags&nosub) throw mrw::invalid_argument("nosub"); + if (regcomp(&_regex, pattern.c_str(), (_hassub?flags:(flags|nosub)))) + throw mrw::invalid_argument(pattern); } RegExp::~RegExp() throw() { regfree(&_regex); } - bool RegExp::operator()(const std::string& text) const throw() { - return !regexec(const_cast(&_regex), text.c_str(), 0, 0, 0); + bool RegExp::operator()(const std::string& text) throw(std::bad_exception) { + if (_hassub) + return !regexec(&_regex, (_text=text).c_str(), MAX_SUB, _sub, 0); + else + return !regexec(&_regex, text.c_str(), 0, 0, 0); + } + + std::string RegExp::operator[](unsigned int n) const throw(std::exception) { + if (!_hassub) + throw mrw::invalid_argument("initialized with no sub expressions"); + if (n>=MAX_SUB || _sub[n].rm_so<0 || _sub[n].rm_eo<0) + throw mrw::invalid_argument(mrw::string(n)); + return _text.substr(_sub[n].rm_so, _sub[n].rm_eo-_sub[n].rm_so); } + } diff --git a/mrw/regexp.hpp b/mrw/regexp.hpp index 22b79b5..1952d9e 100644 --- a/mrw/regexp.hpp +++ b/mrw/regexp.hpp @@ -9,6 +9,9 @@ @license LGPL, see file COPYING $Log$ + Revision 1.2 2004/12/16 13:09:31 marc + possibility to evaluate and extract sub expressions + Revision 1.1 2004/12/14 20:20:30 marc initial version @@ -57,6 +60,9 @@ namespace mrw { public: + /// The maximum number of sub expressions that are evaluated. + static const unsigned int MAX_SUB = 99; + /** @brief flags that influence regular expressions Flag @c newline treats a newline in the text to be compared as @@ -82,16 +88,20 @@ namespace mrw { The regular expression is compiled on instanciation and can then be matced several times on different texts. - @param pattern the regular expression pattern, thee the @c man + @param pattern the regular expression pattern, see the @c man page for POSIX regular expressions (on linux: @c info 7 regex) - @param flags special flags, they default to extended|nosub and + @param hassub pass @c true if you want to evaluate sub expressions + @param flags special flags, they default to extended and should consist of the Flag values combined with @c | + flag @c nosub must not be used, because it is set + automatically if necessary - @throw std::invalid_argument if pattern compilation fails + @throw mrw::invalid_argument if pattern compilation fails or @c nosub + was set in @c flags */ - RegExp(const std::string& pattern, int flags = extended) - throw(std::exception, std::bad_exception); + RegExp(const std::string& pattern, bool hassub=false, int flags=extended) + throw(std::exception); /** @brief cleans up expression from memory */ ~RegExp() throw(); @@ -102,11 +112,28 @@ namespace mrw { @return - true if @c text matches - false otherwise */ - bool operator()(const std::string& text) const throw(); + bool operator()(const std::string& text) throw(std::bad_exception); + + /** @brief get the n-th sub expression of the last matched text + + If the RegExp was instanciated with @c Regexp(pattern, true), + so that sub expressions are evaluated, then you can get the + n-th matched sub expression. + + @param n the number of sub expression to get, get the n-th sub + expression + + @throw mrw::invalid_argument if this subexpression is not + available + */ + std::string operator[](unsigned int n) const throw(std::exception); private: regex_t _regex; + bool _hassub; + regmatch_t _sub[MAX_SUB]; + std::string _text; }; } diff --git a/mrw/regexp_test.cpp b/mrw/regexp_test.cpp index 4b027ea..47d0a2f 100644 --- a/mrw/regexp_test.cpp +++ b/mrw/regexp_test.cpp @@ -9,6 +9,9 @@ @license LGPL, see file COPYING $Log$ + Revision 1.2 2004/12/16 13:09:31 marc + possibility to evaluate and extract sub expressions + Revision 1.1 2004/12/14 20:20:30 marc initial version @@ -16,6 +19,7 @@ */ #include +#include #include #include @@ -31,9 +35,23 @@ public: CPPUNIT_ASSERT(!findHalloWelt("")); CPPUNIT_ASSERT(!findHalloWelt(" Hallo Welt ")); CPPUNIT_ASSERT(findHalloWelt("HalloWelt")); + mrw::RegExp extractTest("^Guten (.*) (Herr|Frau) (.*)$", true); + CPPUNIT_ASSERT(extractTest("Guten Tag Frau Zuercher")); + CPPUNIT_ASSERT(extractTest[1]=="Tag" && + extractTest[2]=="Frau" && + extractTest[3]=="Zuercher"); + } + void ExceptionTest() { + mrw::RegExp extractTest("^Guten (.*) (Herr|Frau) (.*)$", true); + CPPUNIT_ASSERT(extractTest("Guten Tag Herr Schweizer")); + CPPUNIT_ASSERT(extractTest[1]=="Tag" && + extractTest[2]=="Herr" && + extractTest[3]=="Schweizer"); + std::string s = extractTest[4]; } CPPUNIT_TEST_SUITE(RegExpTest); CPPUNIT_TEST(CheckRegExp); + CPPUNIT_TEST_EXCEPTION(ExceptionTest, mrw::invalid_argument); CPPUNIT_TEST_SUITE_END(); }; CPPUNIT_TEST_SUITE_REGISTRATION(RegExpTest);