possibility to evaluate and extract sub expressions

master
Marc Wäckerlin 20 years ago
parent a37c9c2164
commit 7f6d4c67d0
  1. 30
      mrw/regexp.cpp
  2. 39
      mrw/regexp.hpp
  3. 18
      mrw/regexp_test.cpp

@ -9,28 +9,46 @@
@license LGPL, see file <a href="license.html">COPYING</a> @license LGPL, see file <a href="license.html">COPYING</a>
$Log$ $Log$
Revision 1.2 2004/12/16 13:09:31 marc
possibility to evaluate and extract sub expressions
Revision 1.1 2004/12/14 20:20:30 marc Revision 1.1 2004/12/14 20:20:30 marc
initial version initial version
*/ */
#include <mrw/string.hpp>
#include <mrw/regexp.hpp> #include <mrw/regexp.hpp>
#include <mrw/exception.hpp> #include <mrw/exception.hpp>
namespace mrw { namespace mrw {
RegExp::RegExp(const std::string& pattern, int flags) RegExp::RegExp(const std::string& pattern, bool hassub, int flags)
throw(std::exception, std::bad_exception) { throw(std::exception):
if (regcomp(&_regex, pattern.c_str(), flags|nosub)) _hassub(hassub) {
throw(mrw::invalid_argument(pattern)); if (flags&nosub) throw mrw::invalid_argument("nosub");
if (regcomp(&_regex, pattern.c_str(), (_hassub?flags:(flags|nosub))))
throw mrw::invalid_argument(pattern);
} }
RegExp::~RegExp() throw() { RegExp::~RegExp() throw() {
regfree(&_regex); regfree(&_regex);
} }
bool RegExp::operator()(const std::string& text) const throw() { bool RegExp::operator()(const std::string& text) throw(std::bad_exception) {
return !regexec(const_cast<regex_t*>(&_regex), text.c_str(), 0, 0, 0); if (_hassub)
return !regexec(&_regex, (_text=text).c_str(), MAX_SUB, _sub, 0);
else
return !regexec(&_regex, text.c_str(), 0, 0, 0);
}
std::string RegExp::operator[](unsigned int n) const throw(std::exception) {
if (!_hassub)
throw mrw::invalid_argument("initialized with no sub expressions");
if (n>=MAX_SUB || _sub[n].rm_so<0 || _sub[n].rm_eo<0)
throw mrw::invalid_argument(mrw::string(n));
return _text.substr(_sub[n].rm_so, _sub[n].rm_eo-_sub[n].rm_so);
} }
} }

@ -9,6 +9,9 @@
@license LGPL, see file <a href="license.html">COPYING</a> @license LGPL, see file <a href="license.html">COPYING</a>
$Log$ $Log$
Revision 1.2 2004/12/16 13:09:31 marc
possibility to evaluate and extract sub expressions
Revision 1.1 2004/12/14 20:20:30 marc Revision 1.1 2004/12/14 20:20:30 marc
initial version initial version
@ -57,6 +60,9 @@ namespace mrw {
public: public:
/// The maximum number of sub expressions that are evaluated.
static const unsigned int MAX_SUB = 99;
/** @brief flags that influence regular expressions /** @brief flags that influence regular expressions
Flag @c newline treats a newline in the text to be compared as Flag @c newline treats a newline in the text to be compared as
@ -82,16 +88,20 @@ namespace mrw {
The regular expression is compiled on instanciation and can The regular expression is compiled on instanciation and can
then be matced several times on different texts. then be matced several times on different texts.
@param pattern the regular expression pattern, thee the @c man @param pattern the regular expression pattern, see the @c man
page for POSIX regular expressions (on linux: @c page for POSIX regular expressions (on linux: @c
info&nbsp;7&nbsp;regex) info&nbsp;7&nbsp;regex)
@param flags special flags, they default to extended|nosub and @param hassub pass @c true if you want to evaluate sub expressions
@param flags special flags, they default to extended and
should consist of the Flag values combined with @c | should consist of the Flag values combined with @c |
flag @c nosub must not be used, because it is set
automatically if necessary
@throw std::invalid_argument if pattern compilation fails @throw mrw::invalid_argument if pattern compilation fails or @c nosub
was set in @c flags
*/ */
RegExp(const std::string& pattern, int flags = extended) RegExp(const std::string& pattern, bool hassub=false, int flags=extended)
throw(std::exception, std::bad_exception); throw(std::exception);
/** @brief cleans up expression from memory */ /** @brief cleans up expression from memory */
~RegExp() throw(); ~RegExp() throw();
@ -102,11 +112,28 @@ namespace mrw {
@return @return
- true if @c text matches - true if @c text matches
- false otherwise */ - false otherwise */
bool operator()(const std::string& text) const throw(); bool operator()(const std::string& text) throw(std::bad_exception);
/** @brief get the n-th sub expression of the last matched text
If the RegExp was instanciated with @c Regexp(pattern, true),
so that sub expressions are evaluated, then you can get the
n-th matched sub expression.
@param n the number of sub expression to get, get the n-th sub
expression
@throw mrw::invalid_argument if this subexpression is not
available
*/
std::string operator[](unsigned int n) const throw(std::exception);
private: private:
regex_t _regex; regex_t _regex;
bool _hassub;
regmatch_t _sub[MAX_SUB];
std::string _text;
}; };
} }

@ -9,6 +9,9 @@
@license LGPL, see file <a href="license.html">COPYING</a> @license LGPL, see file <a href="license.html">COPYING</a>
$Log$ $Log$
Revision 1.2 2004/12/16 13:09:31 marc
possibility to evaluate and extract sub expressions
Revision 1.1 2004/12/14 20:20:30 marc Revision 1.1 2004/12/14 20:20:30 marc
initial version initial version
@ -16,6 +19,7 @@
*/ */
#include <mrw/regexp.hpp> #include <mrw/regexp.hpp>
#include <mrw/exception.hpp>
#include <cppunit/TestFixture.h> #include <cppunit/TestFixture.h>
#include <cppunit/ui/text/TestRunner.h> #include <cppunit/ui/text/TestRunner.h>
@ -31,9 +35,23 @@ public:
CPPUNIT_ASSERT(!findHalloWelt("")); CPPUNIT_ASSERT(!findHalloWelt(""));
CPPUNIT_ASSERT(!findHalloWelt(" Hallo Welt ")); CPPUNIT_ASSERT(!findHalloWelt(" Hallo Welt "));
CPPUNIT_ASSERT(findHalloWelt("HalloWelt")); CPPUNIT_ASSERT(findHalloWelt("HalloWelt"));
mrw::RegExp extractTest("^Guten (.*) (Herr|Frau) (.*)$", true);
CPPUNIT_ASSERT(extractTest("Guten Tag Frau Zuercher"));
CPPUNIT_ASSERT(extractTest[1]=="Tag" &&
extractTest[2]=="Frau" &&
extractTest[3]=="Zuercher");
}
void ExceptionTest() {
mrw::RegExp extractTest("^Guten (.*) (Herr|Frau) (.*)$", true);
CPPUNIT_ASSERT(extractTest("Guten Tag Herr Schweizer"));
CPPUNIT_ASSERT(extractTest[1]=="Tag" &&
extractTest[2]=="Herr" &&
extractTest[3]=="Schweizer");
std::string s = extractTest[4];
} }
CPPUNIT_TEST_SUITE(RegExpTest); CPPUNIT_TEST_SUITE(RegExpTest);
CPPUNIT_TEST(CheckRegExp); CPPUNIT_TEST(CheckRegExp);
CPPUNIT_TEST_EXCEPTION(ExceptionTest, mrw::invalid_argument);
CPPUNIT_TEST_SUITE_END(); CPPUNIT_TEST_SUITE_END();
}; };
CPPUNIT_TEST_SUITE_REGISTRATION(RegExpTest); CPPUNIT_TEST_SUITE_REGISTRATION(RegExpTest);

Loading…
Cancel
Save