152 lines
4.6 KiB
C++
152 lines
4.6 KiB
C++
/** @file
|
|
|
|
$Id$
|
|
|
|
$Date$
|
|
$Author$
|
|
|
|
@copy © Marc Wäckerlin
|
|
@license LGPL, see file <a href="license.html">COPYING</a>
|
|
|
|
$Log$
|
|
Revision 1.4 2004/12/20 07:40:36 marc
|
|
documentation improved, new grouping
|
|
|
|
Revision 1.3 2004/12/17 16:27:58 marc
|
|
error in documentation syntax
|
|
|
|
Revision 1.2 2004/12/16 13:09:31 marc
|
|
possibility to evaluate and extract sub expressions
|
|
|
|
Revision 1.1 2004/12/14 20:20:30 marc
|
|
initial version
|
|
|
|
|
|
*/
|
|
|
|
#include <regex.h>
|
|
#include <string>
|
|
#include <stdexcept>
|
|
|
|
namespace mrw {
|
|
|
|
/** @defgroup regexp Regular Expressions
|
|
*/
|
|
//@{
|
|
/** @defgroup regexpregexp Regular Expressions
|
|
|
|
A simple wrapper around the C POSIX regular expression library
|
|
with a C++ Interface.
|
|
|
|
Usage sample:
|
|
@code
|
|
std::ifstream file(filename); // read from a file
|
|
mrw::RegExp findBrackets("^\\[.*\\]$"); // look for "[some text]"
|
|
for (std::string line;
|
|
mrw::getline(file, line);) // read line by line
|
|
if (findBrackets(line)) // check for regular expression
|
|
found(line); // found, do something
|
|
@endcode
|
|
|
|
*/
|
|
//@{
|
|
|
|
/** @brief a regular expression
|
|
|
|
This class manages a simple regular expression.
|
|
|
|
Usage sample:
|
|
@code
|
|
std::ifstream file(filename); // read from a file
|
|
mrw::RegExp findBrackets("^\\[.*\\]$"); // look for "[some text]"
|
|
for (std::string line;
|
|
mrw::getline(file, line);) // read line by line
|
|
if (findBrackets(line)) // check for regular expression
|
|
found(line); // found, do something
|
|
@endcode
|
|
|
|
*/
|
|
class RegExp {
|
|
|
|
public:
|
|
|
|
/// The maximum number of sub expressions that are evaluated.
|
|
static const unsigned int MAX_SUB = 99;
|
|
|
|
/** @brief flags that influence regular expressions
|
|
|
|
Flag @c newline treats a newline in the text to be compared as
|
|
dividing the text into multiple lines, so that @c $ can match
|
|
before the newline and @c ^ can match after. Also, don't
|
|
permit @c . to match a newline, and don't permit @c [^...] to
|
|
match a newline.
|
|
|
|
Otherwise, newline acts like any other ordinary character.
|
|
|
|
Flag @c nosub is used internally, don't specify it.
|
|
|
|
*/
|
|
enum Flags {
|
|
extended = REG_EXTENDED, ///< use extended regular expressions
|
|
icase = REG_ICASE, ///< ignore case in match
|
|
nosub = REG_NOSUB, ///< @b internal (report only success/fail)
|
|
newline = REG_NEWLINE ///< treat lines individually
|
|
};
|
|
|
|
/** @brief define a new regular expression
|
|
|
|
The regular expression is compiled on instanciation and can
|
|
then be matced several times on different texts.
|
|
|
|
@param pattern the regular expression pattern, see the @c man
|
|
page for POSIX regular expressions (on linux: @c
|
|
info 7 regex)
|
|
@param hassub pass @c true if you want to evaluate sub expressions
|
|
@param flags special flags, they default to extended and
|
|
should consist of the Flag values combined with @c |
|
|
flag @c nosub must not be used, because it is set
|
|
automatically if necessary
|
|
|
|
@throw mrw::invalid_argument if pattern compilation fails or @c nosub
|
|
was set in @c flags
|
|
*/
|
|
RegExp(const std::string& pattern, bool hassub=false, int flags=extended)
|
|
throw(std::exception);
|
|
|
|
/** @brief cleans up expression from memory */
|
|
~RegExp() throw();
|
|
|
|
/** @brief apply the expression, match a text against the expression
|
|
|
|
@param text a text that is matched against the regular expression
|
|
@return
|
|
- true if @c text matches
|
|
- false otherwise */
|
|
bool operator()(const std::string& text) throw(std::bad_exception);
|
|
|
|
/** @brief get the n-th sub expression of the last matched text
|
|
|
|
If the RegExp was instanciated with @c Regexp(pattern, true),
|
|
so that sub expressions are evaluated, then you can get the
|
|
n-th matched sub expression.
|
|
|
|
@param n the number of sub expression to get, get the n-th sub
|
|
expression
|
|
|
|
@throw mrw::invalid_argument if this subexpression is not
|
|
available
|
|
*/
|
|
std::string operator[](unsigned int n) const throw(std::exception);
|
|
|
|
private:
|
|
|
|
regex_t _regex;
|
|
bool _hassub;
|
|
regmatch_t _sub[MAX_SUB];
|
|
std::string _text;
|
|
|
|
};
|
|
//@}
|
|
//@}
|
|
}
|