| 
									
										
										
										
											2004-12-14 20:20:30 +00:00
										 |  |  | /** @file
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $Id$ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $Date$ | 
					
						
							|  |  |  |     $Author$ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @copy © Marc Wäckerlin | 
					
						
							|  |  |  |     @license LGPL, see file <a href="license.html">COPYING</a> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $Log$ | 
					
						
							| 
									
										
										
										
											2004-12-20 13:24:26 +00:00
										 |  |  |     Revision 1.5  2004/12/20 13:21:49  marc | 
					
						
							|  |  |  |     #ifndef forgotten
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-12-20 07:40:36 +00:00
										 |  |  |     Revision 1.4  2004/12/20 07:40:36  marc | 
					
						
							|  |  |  |     documentation improved, new grouping | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-12-17 16:27:58 +00:00
										 |  |  |     Revision 1.3  2004/12/17 16:27:58  marc | 
					
						
							|  |  |  |     error in documentation syntax | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-12-16 13:09:31 +00:00
										 |  |  |     Revision 1.2  2004/12/16 13:09:31  marc | 
					
						
							|  |  |  |     possibility to evaluate and extract sub expressions | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-12-14 20:20:30 +00:00
										 |  |  |     Revision 1.1  2004/12/14 20:20:30  marc | 
					
						
							|  |  |  |     initial version | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | */ | 
					
						
							| 
									
										
										
										
											2004-12-20 13:24:26 +00:00
										 |  |  | #ifndef __MRW__REGEXP_HPP__
 | 
					
						
							|  |  |  | #define __MRW__REGEXP_HPP__
 | 
					
						
							| 
									
										
										
										
											2004-12-14 20:20:30 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | #include <regex.h>
 | 
					
						
							|  |  |  | #include <string>
 | 
					
						
							|  |  |  | #include <stdexcept>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | namespace mrw { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /** @defgroup regexp Regular Expressions
 | 
					
						
							| 
									
										
										
										
											2004-12-20 07:40:36 +00:00
										 |  |  |    */ | 
					
						
							|  |  |  |   //@{
 | 
					
						
							|  |  |  |   /** @defgroup regexpregexp Regular Expressions
 | 
					
						
							| 
									
										
										
										
											2004-12-14 20:20:30 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |       A simple wrapper around the C POSIX regular expression library | 
					
						
							|  |  |  |       with a C++ Interface. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       Usage sample: | 
					
						
							|  |  |  |       @code | 
					
						
							|  |  |  |         std::ifstream file(filename);           // read from a file
 | 
					
						
							|  |  |  |         mrw::RegExp findBrackets("^\\[.*\\]$"); // look for "[some text]"
 | 
					
						
							|  |  |  |         for (std::string line; | 
					
						
							|  |  |  |              mrw::getline(file, line);)         // read line by line
 | 
					
						
							|  |  |  |           if (findBrackets(line))               // check for regular expression
 | 
					
						
							|  |  |  |             found(line);                        // found, do something
 | 
					
						
							|  |  |  |       @endcode | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   */ | 
					
						
							| 
									
										
										
										
											2004-12-17 16:27:58 +00:00
										 |  |  |   //@{
 | 
					
						
							| 
									
										
										
										
											2004-12-14 20:20:30 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   /** @brief a regular expression
 | 
					
						
							|  |  |  |        | 
					
						
							|  |  |  |       This class manages a simple regular expression. | 
					
						
							|  |  |  |        | 
					
						
							|  |  |  |       Usage sample: | 
					
						
							|  |  |  |       @code | 
					
						
							|  |  |  |         std::ifstream file(filename);           // read from a file
 | 
					
						
							|  |  |  |         mrw::RegExp findBrackets("^\\[.*\\]$"); // look for "[some text]"
 | 
					
						
							|  |  |  |         for (std::string line; | 
					
						
							|  |  |  |              mrw::getline(file, line);)         // read line by line
 | 
					
						
							|  |  |  |           if (findBrackets(line))               // check for regular expression
 | 
					
						
							|  |  |  |             found(line);                        // found, do something
 | 
					
						
							|  |  |  |       @endcode | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   */ | 
					
						
							|  |  |  |   class RegExp { | 
					
						
							|  |  |  |    | 
					
						
							|  |  |  |   public: | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-12-16 13:09:31 +00:00
										 |  |  |     /// The maximum number of sub expressions that are evaluated.
 | 
					
						
							|  |  |  |     static const unsigned int MAX_SUB = 99; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-12-14 20:20:30 +00:00
										 |  |  |     /** @brief flags that influence regular expressions
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Flag @c newline treats a newline in the text to be compared as | 
					
						
							|  |  |  |         dividing the text into multiple lines, so that @c $ can match | 
					
						
							|  |  |  |         before the newline and @c ^ can match after.  Also, don't | 
					
						
							|  |  |  |         permit @c . to match a newline, and don't permit @c [^...] to | 
					
						
							|  |  |  |         match a newline. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Otherwise, newline acts like any other ordinary character. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Flag @c nosub is used internally, don't specify it. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     */ | 
					
						
							|  |  |  |     enum Flags { | 
					
						
							|  |  |  |       extended = REG_EXTENDED,  ///< use extended regular expressions
 | 
					
						
							|  |  |  |       icase    = REG_ICASE,     ///< ignore case in match
 | 
					
						
							|  |  |  |       nosub    = REG_NOSUB,     ///< @b internal (report only success/fail)
 | 
					
						
							|  |  |  |       newline  = REG_NEWLINE    ///< treat lines individually
 | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** @brief define a new regular expression
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         The regular expression is compiled on instanciation and can | 
					
						
							|  |  |  |         then be matced several times on different texts. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-12-16 13:09:31 +00:00
										 |  |  |         @param pattern the regular expression pattern, see the @c man | 
					
						
							| 
									
										
										
										
											2004-12-14 20:20:30 +00:00
										 |  |  |                        page for POSIX regular expressions (on linux: @c | 
					
						
							|  |  |  |                        info 7 regex) | 
					
						
							| 
									
										
										
										
											2004-12-16 13:09:31 +00:00
										 |  |  |         @param hassub  pass @c true if you want to evaluate sub expressions | 
					
						
							|  |  |  |         @param flags   special flags, they default to extended and | 
					
						
							| 
									
										
										
										
											2004-12-14 20:20:30 +00:00
										 |  |  |                        should consist of the Flag values combined with @c | | 
					
						
							| 
									
										
										
										
											2004-12-16 13:09:31 +00:00
										 |  |  |                        flag @c nosub must not be used, because it is set | 
					
						
							|  |  |  |                        automatically if necessary | 
					
						
							| 
									
										
										
										
											2004-12-14 20:20:30 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-12-16 13:09:31 +00:00
										 |  |  |         @throw mrw::invalid_argument if pattern compilation fails or @c nosub | 
					
						
							|  |  |  |                                      was set in @c flags | 
					
						
							| 
									
										
										
										
											2004-12-14 20:20:30 +00:00
										 |  |  |     */ | 
					
						
							| 
									
										
										
										
											2004-12-16 13:09:31 +00:00
										 |  |  |     RegExp(const std::string& pattern, bool hassub=false, int flags=extended) | 
					
						
							|  |  |  |       throw(std::exception); | 
					
						
							| 
									
										
										
										
											2004-12-14 20:20:30 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     /** @brief cleans up expression from memory */ | 
					
						
							|  |  |  |     ~RegExp() throw(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** @brief apply the expression, match a text against the expression
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         @param text a text that is matched against the regular expression | 
					
						
							|  |  |  |         @return | 
					
						
							|  |  |  |           - true if @c text matches | 
					
						
							|  |  |  |           - false otherwise */ | 
					
						
							| 
									
										
										
										
											2004-12-16 13:09:31 +00:00
										 |  |  |     bool operator()(const std::string& text) throw(std::bad_exception); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** @brief get the n-th sub expression of the last matched text
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         If the RegExp was instanciated with @c Regexp(pattern, true), | 
					
						
							|  |  |  |         so that sub expressions are evaluated, then you can get the | 
					
						
							|  |  |  |         n-th matched sub expression. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         @param n the number of sub expression to get, get the n-th sub | 
					
						
							|  |  |  |                  expression | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         @throw mrw::invalid_argument if this subexpression is not | 
					
						
							|  |  |  |                                      available | 
					
						
							|  |  |  |     */ | 
					
						
							|  |  |  |     std::string operator[](unsigned int n) const throw(std::exception); | 
					
						
							| 
									
										
										
										
											2004-12-14 20:20:30 +00:00
										 |  |  |    | 
					
						
							|  |  |  |   private: | 
					
						
							|  |  |  |    | 
					
						
							|  |  |  |     regex_t _regex; | 
					
						
							| 
									
										
										
										
											2004-12-16 13:09:31 +00:00
										 |  |  |     bool _hassub; | 
					
						
							|  |  |  |     regmatch_t _sub[MAX_SUB]; | 
					
						
							|  |  |  |     std::string _text; | 
					
						
							| 
									
										
										
										
											2004-12-14 20:20:30 +00:00
										 |  |  |    | 
					
						
							|  |  |  |   }; | 
					
						
							| 
									
										
										
										
											2004-12-17 16:27:58 +00:00
										 |  |  |   //@}
 | 
					
						
							| 
									
										
										
										
											2004-12-20 07:40:36 +00:00
										 |  |  |   //@}
 | 
					
						
							| 
									
										
										
										
											2004-12-14 20:20:30 +00:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2004-12-20 13:24:26 +00:00
										 |  |  | #endif
 |