diff --git a/mrw/regexp.cpp b/mrw/regexp.cpp
index 3d882c0..3c64c74 100644
--- a/mrw/regexp.cpp
+++ b/mrw/regexp.cpp
@@ -9,28 +9,46 @@
@license LGPL, see file COPYING
$Log$
+ Revision 1.2 2004/12/16 13:09:31 marc
+ possibility to evaluate and extract sub expressions
+
Revision 1.1 2004/12/14 20:20:30 marc
initial version
*/
+#include
#include
#include
namespace mrw {
- RegExp::RegExp(const std::string& pattern, int flags)
- throw(std::exception, std::bad_exception) {
- if (regcomp(&_regex, pattern.c_str(), flags|nosub))
- throw(mrw::invalid_argument(pattern));
+ RegExp::RegExp(const std::string& pattern, bool hassub, int flags)
+ throw(std::exception):
+ _hassub(hassub) {
+ if (flags&nosub) throw mrw::invalid_argument("nosub");
+ if (regcomp(&_regex, pattern.c_str(), (_hassub?flags:(flags|nosub))))
+ throw mrw::invalid_argument(pattern);
}
RegExp::~RegExp() throw() {
regfree(&_regex);
}
- bool RegExp::operator()(const std::string& text) const throw() {
- return !regexec(const_cast(&_regex), text.c_str(), 0, 0, 0);
+ bool RegExp::operator()(const std::string& text) throw(std::bad_exception) {
+ if (_hassub)
+ return !regexec(&_regex, (_text=text).c_str(), MAX_SUB, _sub, 0);
+ else
+ return !regexec(&_regex, text.c_str(), 0, 0, 0);
+ }
+
+ std::string RegExp::operator[](unsigned int n) const throw(std::exception) {
+ if (!_hassub)
+ throw mrw::invalid_argument("initialized with no sub expressions");
+ if (n>=MAX_SUB || _sub[n].rm_so<0 || _sub[n].rm_eo<0)
+ throw mrw::invalid_argument(mrw::string(n));
+ return _text.substr(_sub[n].rm_so, _sub[n].rm_eo-_sub[n].rm_so);
}
+
}
diff --git a/mrw/regexp.hpp b/mrw/regexp.hpp
index 22b79b5..1952d9e 100644
--- a/mrw/regexp.hpp
+++ b/mrw/regexp.hpp
@@ -9,6 +9,9 @@
@license LGPL, see file COPYING
$Log$
+ Revision 1.2 2004/12/16 13:09:31 marc
+ possibility to evaluate and extract sub expressions
+
Revision 1.1 2004/12/14 20:20:30 marc
initial version
@@ -57,6 +60,9 @@ namespace mrw {
public:
+ /// The maximum number of sub expressions that are evaluated.
+ static const unsigned int MAX_SUB = 99;
+
/** @brief flags that influence regular expressions
Flag @c newline treats a newline in the text to be compared as
@@ -82,16 +88,20 @@ namespace mrw {
The regular expression is compiled on instanciation and can
then be matced several times on different texts.
- @param pattern the regular expression pattern, thee the @c man
+ @param pattern the regular expression pattern, see the @c man
page for POSIX regular expressions (on linux: @c
info 7 regex)
- @param flags special flags, they default to extended|nosub and
+ @param hassub pass @c true if you want to evaluate sub expressions
+ @param flags special flags, they default to extended and
should consist of the Flag values combined with @c |
+ flag @c nosub must not be used, because it is set
+ automatically if necessary
- @throw std::invalid_argument if pattern compilation fails
+ @throw mrw::invalid_argument if pattern compilation fails or @c nosub
+ was set in @c flags
*/
- RegExp(const std::string& pattern, int flags = extended)
- throw(std::exception, std::bad_exception);
+ RegExp(const std::string& pattern, bool hassub=false, int flags=extended)
+ throw(std::exception);
/** @brief cleans up expression from memory */
~RegExp() throw();
@@ -102,11 +112,28 @@ namespace mrw {
@return
- true if @c text matches
- false otherwise */
- bool operator()(const std::string& text) const throw();
+ bool operator()(const std::string& text) throw(std::bad_exception);
+
+ /** @brief get the n-th sub expression of the last matched text
+
+ If the RegExp was instanciated with @c Regexp(pattern, true),
+ so that sub expressions are evaluated, then you can get the
+ n-th matched sub expression.
+
+ @param n the number of sub expression to get, get the n-th sub
+ expression
+
+ @throw mrw::invalid_argument if this subexpression is not
+ available
+ */
+ std::string operator[](unsigned int n) const throw(std::exception);
private:
regex_t _regex;
+ bool _hassub;
+ regmatch_t _sub[MAX_SUB];
+ std::string _text;
};
}
diff --git a/mrw/regexp_test.cpp b/mrw/regexp_test.cpp
index 4b027ea..47d0a2f 100644
--- a/mrw/regexp_test.cpp
+++ b/mrw/regexp_test.cpp
@@ -9,6 +9,9 @@
@license LGPL, see file COPYING
$Log$
+ Revision 1.2 2004/12/16 13:09:31 marc
+ possibility to evaluate and extract sub expressions
+
Revision 1.1 2004/12/14 20:20:30 marc
initial version
@@ -16,6 +19,7 @@
*/
#include
+#include
#include
#include
@@ -31,9 +35,23 @@ public:
CPPUNIT_ASSERT(!findHalloWelt(""));
CPPUNIT_ASSERT(!findHalloWelt(" Hallo Welt "));
CPPUNIT_ASSERT(findHalloWelt("HalloWelt"));
+ mrw::RegExp extractTest("^Guten (.*) (Herr|Frau) (.*)$", true);
+ CPPUNIT_ASSERT(extractTest("Guten Tag Frau Zuercher"));
+ CPPUNIT_ASSERT(extractTest[1]=="Tag" &&
+ extractTest[2]=="Frau" &&
+ extractTest[3]=="Zuercher");
+ }
+ void ExceptionTest() {
+ mrw::RegExp extractTest("^Guten (.*) (Herr|Frau) (.*)$", true);
+ CPPUNIT_ASSERT(extractTest("Guten Tag Herr Schweizer"));
+ CPPUNIT_ASSERT(extractTest[1]=="Tag" &&
+ extractTest[2]=="Herr" &&
+ extractTest[3]=="Schweizer");
+ std::string s = extractTest[4];
}
CPPUNIT_TEST_SUITE(RegExpTest);
CPPUNIT_TEST(CheckRegExp);
+ CPPUNIT_TEST_EXCEPTION(ExceptionTest, mrw::invalid_argument);
CPPUNIT_TEST_SUITE_END();
};
CPPUNIT_TEST_SUITE_REGISTRATION(RegExpTest);