mrw-cxx/mrw/regexp.hpp

/** @file

    $Id$

    $Date$
    $Author$

    @copy &copy; Marc W&auml;ckerlin
    @license LGPL, see file <a href="license.html">COPYING</a>

    $Log$
    Revision 1.2  2004/12/16 13:09:31  marc
    possibility to evaluate and extract sub expressions

    Revision 1.1  2004/12/14 20:20:30  marc
    initial version


*/

#include <regex.h>
#include <string>
#include <stdexcept>

namespace mrw {

  /** @defgroup regexp Regular Expressions

      A simple wrapper around the C POSIX regular expression library
      with a C++ Interface.

      Usage sample:
      @code
        std::ifstream file(filename);           // read from a file
        mrw::RegExp findBrackets("^\\[.*\\]$"); // look for "[some text]"
        for (std::string line;
             mrw::getline(file, line);)         // read line by line
          if (findBrackets(line))               // check for regular expression
            found(line);                        // found, do something
      @endcode

  */

  /** @brief a regular expression
      
      This class manages a simple regular expression.
      
      Usage sample:
      @code
        std::ifstream file(filename);           // read from a file
        mrw::RegExp findBrackets("^\\[.*\\]$"); // look for "[some text]"
        for (std::string line;
             mrw::getline(file, line);)         // read line by line
          if (findBrackets(line))               // check for regular expression
            found(line);                        // found, do something
      @endcode

  */
  class RegExp {
  
  public:

    /// The maximum number of sub expressions that are evaluated.
    static const unsigned int MAX_SUB = 99;

    /** @brief flags that influence regular expressions

        Flag @c newline treats a newline in the text to be compared as
        dividing the text into multiple lines, so that @c $ can match
        before the newline and @c ^ can match after.  Also, don't
        permit @c . to match a newline, and don't permit @c [^...] to
        match a newline.

        Otherwise, newline acts like any other ordinary character.

        Flag @c nosub is used internally, don't specify it.

    */
    enum Flags {
      extended = REG_EXTENDED,  ///< use extended regular expressions
      icase    = REG_ICASE,     ///< ignore case in match
      nosub    = REG_NOSUB,     ///< @b internal (report only success/fail)
      newline  = REG_NEWLINE    ///< treat lines individually
    };

    /** @brief define a new regular expression

        The regular expression is compiled on instanciation and can
        then be matced several times on different texts.

        @param pattern the regular expression pattern, see the @c man
                       page for POSIX regular expressions (on linux: @c
                       info&nbsp;7&nbsp;regex)
        @param hassub  pass @c true if you want to evaluate sub expressions
        @param flags   special flags, they default to extended and
                       should consist of the Flag values combined with @c |
                       flag @c nosub must not be used, because it is set
                       automatically if necessary

        @throw mrw::invalid_argument if pattern compilation fails or @c nosub
                                     was set in @c flags
    */
    RegExp(const std::string& pattern, bool hassub=false, int flags=extended)
      throw(std::exception);

    /** @brief cleans up expression from memory */
    ~RegExp() throw();

    /** @brief apply the expression, match a text against the expression

        @param text a text that is matched against the regular expression
        @return
          - true if @c text matches
          - false otherwise */
    bool operator()(const std::string& text) throw(std::bad_exception);

    /** @brief get the n-th sub expression of the last matched text

        If the RegExp was instanciated with @c Regexp(pattern, true),
        so that sub expressions are evaluated, then you can get the
        n-th matched sub expression.

        @param n the number of sub expression to get, get the n-th sub
                 expression

        @throw mrw::invalid_argument if this subexpression is not
                                     available
    */
    std::string operator[](unsigned int n) const throw(std::exception);
  
  private:
  
    regex_t _regex;
    bool _hassub;
    regmatch_t _sub[MAX_SUB];
    std::string _text;
  
  };
}
initial version 2004-12-14 20:20:30 +00:00			`/** @file`

			$Id$

			$Date$
			$Author$

			`@copy © Marc Wäckerlin`
			`@license LGPL, see file <a href="license.html">COPYING</a>`

			$Log$
possibility to evaluate and extract sub expressions 2004-12-16 13:09:31 +00:00			`Revision 1.2 2004/12/16 13:09:31 marc`
			`possibility to evaluate and extract sub expressions`

initial version 2004-12-14 20:20:30 +00:00			`Revision 1.1 2004/12/14 20:20:30 marc`
			`initial version`


			`*/`

			`#include <regex.h>`
			`#include <string>`
			`#include <stdexcept>`

			`namespace mrw {`

			`/** @defgroup regexp Regular Expressions`

			`A simple wrapper around the C POSIX regular expression library`
			`with a C++ Interface.`

			`Usage sample:`
			`@code`
			`std::ifstream file(filename); // read from a file`
			`mrw::RegExp findBrackets("^\\[.*\\]$"); // look for "[some text]"`
			`for (std::string line;`
			`mrw::getline(file, line);) // read line by line`
			`if (findBrackets(line)) // check for regular expression`
			`found(line); // found, do something`
			`@endcode`

			`*/`

			`/** @brief a regular expression`

			`This class manages a simple regular expression.`

			`Usage sample:`
			`@code`
			`std::ifstream file(filename); // read from a file`
			`mrw::RegExp findBrackets("^\\[.*\\]$"); // look for "[some text]"`
			`for (std::string line;`
			`mrw::getline(file, line);) // read line by line`
			`if (findBrackets(line)) // check for regular expression`
			`found(line); // found, do something`
			`@endcode`

			`*/`
			`class RegExp {`

			`public:`

possibility to evaluate and extract sub expressions 2004-12-16 13:09:31 +00:00			`/// The maximum number of sub expressions that are evaluated.`
			`static const unsigned int MAX_SUB = 99;`

initial version 2004-12-14 20:20:30 +00:00			`/** @brief flags that influence regular expressions`

			`Flag @c newline treats a newline in the text to be compared as`
			`dividing the text into multiple lines, so that @c $ can match`
			`before the newline and @c ^ can match after. Also, don't`
			`permit @c . to match a newline, and don't permit @c [^...] to`
			`match a newline.`

			`Otherwise, newline acts like any other ordinary character.`

			`Flag @c nosub is used internally, don't specify it.`

			`*/`
			`enum Flags {`
			`extended = REG_EXTENDED, ///< use extended regular expressions`
			`icase = REG_ICASE, ///< ignore case in match`
			`nosub = REG_NOSUB, ///< @b internal (report only success/fail)`
			`newline = REG_NEWLINE ///< treat lines individually`
			`};`

			`/** @brief define a new regular expression`

			`The regular expression is compiled on instanciation and can`
			`then be matced several times on different texts.`

possibility to evaluate and extract sub expressions 2004-12-16 13:09:31 +00:00			`@param pattern the regular expression pattern, see the @c man`
initial version 2004-12-14 20:20:30 +00:00			`page for POSIX regular expressions (on linux: @c`
			`info 7 regex)`
possibility to evaluate and extract sub expressions 2004-12-16 13:09:31 +00:00			`@param hassub pass @c true if you want to evaluate sub expressions`
			`@param flags special flags, they default to extended and`
initial version 2004-12-14 20:20:30 +00:00			`should consist of the Flag values combined with @c \|`
possibility to evaluate and extract sub expressions 2004-12-16 13:09:31 +00:00			`flag @c nosub must not be used, because it is set`
			`automatically if necessary`
initial version 2004-12-14 20:20:30 +00:00
possibility to evaluate and extract sub expressions 2004-12-16 13:09:31 +00:00			`@throw mrw::invalid_argument if pattern compilation fails or @c nosub`
			`was set in @c flags`
initial version 2004-12-14 20:20:30 +00:00			`*/`
possibility to evaluate and extract sub expressions 2004-12-16 13:09:31 +00:00			`RegExp(const std::string& pattern, bool hassub=false, int flags=extended)`
			`throw(std::exception);`
initial version 2004-12-14 20:20:30 +00:00
			`/** @brief cleans up expression from memory */`
			`~RegExp() throw();`

			`/** @brief apply the expression, match a text against the expression`

			`@param text a text that is matched against the regular expression`
			`@return`
			`- true if @c text matches`
			`- false otherwise */`
possibility to evaluate and extract sub expressions 2004-12-16 13:09:31 +00:00			`bool operator()(const std::string& text) throw(std::bad_exception);`

			`/** @brief get the n-th sub expression of the last matched text`

			`If the RegExp was instanciated with @c Regexp(pattern, true),`
			`so that sub expressions are evaluated, then you can get the`
			`n-th matched sub expression.`

			`@param n the number of sub expression to get, get the n-th sub`
			`expression`

			`@throw mrw::invalid_argument if this subexpression is not`
			`available`
			`*/`
			`std::string operator[](unsigned int n) const throw(std::exception);`
initial version 2004-12-14 20:20:30 +00:00
			`private:`

			`regex_t _regex;`
possibility to evaluate and extract sub expressions 2004-12-16 13:09:31 +00:00			`bool _hassub;`
			`regmatch_t _sub[MAX_SUB];`
			`std::string _text;`
initial version 2004-12-14 20:20:30 +00:00
			`};`
			`}`