possibility to evaluate and extract sub expressions

master
Marc Wäckerlin 20 years ago
parent a37c9c2164
commit 7f6d4c67d0
  1. 30
      mrw/regexp.cpp
  2. 39
      mrw/regexp.hpp
  3. 18
      mrw/regexp_test.cpp

@ -9,28 +9,46 @@
@license LGPL, see file <a href="license.html">COPYING</a>
$Log$
Revision 1.2 2004/12/16 13:09:31 marc
possibility to evaluate and extract sub expressions
Revision 1.1 2004/12/14 20:20:30 marc
initial version
*/
#include <mrw/string.hpp>
#include <mrw/regexp.hpp>
#include <mrw/exception.hpp>
namespace mrw {
RegExp::RegExp(const std::string& pattern, int flags)
throw(std::exception, std::bad_exception) {
if (regcomp(&_regex, pattern.c_str(), flags|nosub))
throw(mrw::invalid_argument(pattern));
RegExp::RegExp(const std::string& pattern, bool hassub, int flags)
throw(std::exception):
_hassub(hassub) {
if (flags&nosub) throw mrw::invalid_argument("nosub");
if (regcomp(&_regex, pattern.c_str(), (_hassub?flags:(flags|nosub))))
throw mrw::invalid_argument(pattern);
}
RegExp::~RegExp() throw() {
regfree(&_regex);
}
bool RegExp::operator()(const std::string& text) const throw() {
return !regexec(const_cast<regex_t*>(&_regex), text.c_str(), 0, 0, 0);
bool RegExp::operator()(const std::string& text) throw(std::bad_exception) {
if (_hassub)
return !regexec(&_regex, (_text=text).c_str(), MAX_SUB, _sub, 0);
else
return !regexec(&_regex, text.c_str(), 0, 0, 0);
}
std::string RegExp::operator[](unsigned int n) const throw(std::exception) {
if (!_hassub)
throw mrw::invalid_argument("initialized with no sub expressions");
if (n>=MAX_SUB || _sub[n].rm_so<0 || _sub[n].rm_eo<0)
throw mrw::invalid_argument(mrw::string(n));
return _text.substr(_sub[n].rm_so, _sub[n].rm_eo-_sub[n].rm_so);
}
}

@ -9,6 +9,9 @@
@license LGPL, see file <a href="license.html">COPYING</a>
$Log$
Revision 1.2 2004/12/16 13:09:31 marc
possibility to evaluate and extract sub expressions
Revision 1.1 2004/12/14 20:20:30 marc
initial version
@ -57,6 +60,9 @@ namespace mrw {
public:
/// The maximum number of sub expressions that are evaluated.
static const unsigned int MAX_SUB = 99;
/** @brief flags that influence regular expressions
Flag @c newline treats a newline in the text to be compared as
@ -82,16 +88,20 @@ namespace mrw {
The regular expression is compiled on instanciation and can
then be matced several times on different texts.
@param pattern the regular expression pattern, thee the @c man
@param pattern the regular expression pattern, see the @c man
page for POSIX regular expressions (on linux: @c
info&nbsp;7&nbsp;regex)
@param flags special flags, they default to extended|nosub and
@param hassub pass @c true if you want to evaluate sub expressions
@param flags special flags, they default to extended and
should consist of the Flag values combined with @c |
flag @c nosub must not be used, because it is set
automatically if necessary
@throw std::invalid_argument if pattern compilation fails
@throw mrw::invalid_argument if pattern compilation fails or @c nosub
was set in @c flags
*/
RegExp(const std::string& pattern, int flags = extended)
throw(std::exception, std::bad_exception);
RegExp(const std::string& pattern, bool hassub=false, int flags=extended)
throw(std::exception);
/** @brief cleans up expression from memory */
~RegExp() throw();
@ -102,11 +112,28 @@ namespace mrw {
@return
- true if @c text matches
- false otherwise */
bool operator()(const std::string& text) const throw();
bool operator()(const std::string& text) throw(std::bad_exception);
/** @brief get the n-th sub expression of the last matched text
If the RegExp was instanciated with @c Regexp(pattern, true),
so that sub expressions are evaluated, then you can get the
n-th matched sub expression.
@param n the number of sub expression to get, get the n-th sub
expression
@throw mrw::invalid_argument if this subexpression is not
available
*/
std::string operator[](unsigned int n) const throw(std::exception);
private:
regex_t _regex;
bool _hassub;
regmatch_t _sub[MAX_SUB];
std::string _text;
};
}

@ -9,6 +9,9 @@
@license LGPL, see file <a href="license.html">COPYING</a>
$Log$
Revision 1.2 2004/12/16 13:09:31 marc
possibility to evaluate and extract sub expressions
Revision 1.1 2004/12/14 20:20:30 marc
initial version
@ -16,6 +19,7 @@
*/
#include <mrw/regexp.hpp>
#include <mrw/exception.hpp>
#include <cppunit/TestFixture.h>
#include <cppunit/ui/text/TestRunner.h>
@ -31,9 +35,23 @@ public:
CPPUNIT_ASSERT(!findHalloWelt(""));
CPPUNIT_ASSERT(!findHalloWelt(" Hallo Welt "));
CPPUNIT_ASSERT(findHalloWelt("HalloWelt"));
mrw::RegExp extractTest("^Guten (.*) (Herr|Frau) (.*)$", true);
CPPUNIT_ASSERT(extractTest("Guten Tag Frau Zuercher"));
CPPUNIT_ASSERT(extractTest[1]=="Tag" &&
extractTest[2]=="Frau" &&
extractTest[3]=="Zuercher");
}
void ExceptionTest() {
mrw::RegExp extractTest("^Guten (.*) (Herr|Frau) (.*)$", true);
CPPUNIT_ASSERT(extractTest("Guten Tag Herr Schweizer"));
CPPUNIT_ASSERT(extractTest[1]=="Tag" &&
extractTest[2]=="Herr" &&
extractTest[3]=="Schweizer");
std::string s = extractTest[4];
}
CPPUNIT_TEST_SUITE(RegExpTest);
CPPUNIT_TEST(CheckRegExp);
CPPUNIT_TEST_EXCEPTION(ExceptionTest, mrw::invalid_argument);
CPPUNIT_TEST_SUITE_END();
};
CPPUNIT_TEST_SUITE_REGISTRATION(RegExpTest);

Loading…
Cancel
Save