Pattern.h

00001 #ifndef __PATTERN_H__
00002 #define __PATTERN_H__
00003 
00004 #include <vector>
00005 #include <string>
00006 #include <map>
00007 
00008 class Matcher;
00009 class NFANode;
00010 class NFAQuantifierNode;
00011 
00028 class Pattern
00029 {
00030   friend class Matcher;
00031   friend class NFANode;
00032   friend class NFAQuantifierNode;
00033   private:
00041     Pattern(const std::string & rhs);
00042   protected:
00047     static std::map<std::string, Pattern *> compiledPatterns;
00053     static std::map<std::string, std::pair<std::string, unsigned long> > registeredPatterns;
00054   protected:
00059     std::map<NFANode*, bool> nodes;
00065     Matcher * matcher;
00069     NFANode * head;
00073     std::string pattern;
00078     bool error;
00084     int curInd;
00088     int groupCount;
00092     int nonCapGroupCount;
00096     unsigned long flags;
00097   protected:
00102     void raiseError();
00108     NFANode * registerNode(NFANode * node);
00109 
00119     std::string classUnion      (std::string s1, std::string s2)  const;
00129     std::string classIntersect  (std::string s1, std::string s2)  const;
00139     std::string classNegate     (std::string s1)                  const;
00149     std::string classCreateRange(char low,       char hi)         const;
00150 
00159     int getInt(int start, int end);
00171     bool quantifyCurly(int & sNum, int & eNum);
00183     NFANode * quantifyGroup(NFANode * start, NFANode * stop, const int gn);
00184 
00192     NFANode * quantify(NFANode * newNode);
00199     std::string parseClass();
00206     std::string parsePosix();
00211     std::string parseOctal();
00216     std::string parseHex();
00221     NFANode *   parseBackref();
00231     std::string parseEscape(bool & inv, bool & quo);
00240     NFANode * parseRegisteredPattern(NFANode ** end);
00248     NFANode * parseBehind(const bool pos, NFANode ** end);
00253     NFANode * parseQuote();
00262     NFANode * parse(const bool inParen = 0, const bool inOr = 0, NFANode ** end = NULL);
00263   public:
00265     const static unsigned long CASE_INSENSITIVE;
00267     const static unsigned long LITERAL;
00269     const static unsigned long DOT_MATCHES_ALL;
00273     const static unsigned long MULTILINE_MATCHING;
00277     const static unsigned long UNIX_LINE_MODE;
00279     const static int MIN_QMATCH;
00281     const static int MAX_QMATCH;
00282   public:
00295     static Pattern                    * compile        (const std::string & pattern,
00296                                                         const unsigned long mode = 0);
00310     static Pattern                    * compileAndKeep (const std::string & pattern,
00311                                                         const unsigned long mode = 0);
00312 
00332     static std::string                  replace        (const std::string & pattern,
00333                                                         const std::string & str,
00334                                                         const std::string & replaceWith,
00335                                                         const unsigned long mode = 0);
00336 
00360     static std::vector<std::string>     split          (const std::string & pattern,
00361                                                         const std::string & str,
00362                                                         const bool keepEmptys = 0,
00363                                                         const unsigned long limit = 0,
00364                                                         const unsigned long mode = 0);
00365 
00384     static std::vector<std::string>     findAll        (const std::string & pattern,
00385                                                         const std::string & str,
00386                                                         const unsigned long mode = 0);
00387 
00397     static bool                         matches        (const std::string & pattern,
00398                                                         const std::string & str,
00399                                                         const unsigned long mode = 0);
00400 
00420     static bool                         registerPattern(const std::string & name,
00421                                                         const std::string & pattern,
00422                                                         const unsigned long mode = 0);
00423 
00427     static void                         unregisterPatterns();
00431     static void                         clearPatternCache();
00432 
00454     static std::pair<std::string, int>  findNthMatch   (const std::string & pattern,
00455                                                         const std::string & str,
00456                                                         const int matchNum,
00457                                                         const unsigned long mode = 0);
00458   public:
00462     ~Pattern();
00463 
00464     std::string               replace        (const std::string & str,
00465                                               const std::string & replaceWith);
00466     std::vector<std::string>  split          (const std::string & str, const bool keepEmptys = 0,
00467                                               const unsigned long limit = 0);
00468     std::vector<std::string>  findAll        (const std::string & str);
00469     bool                      matches         (const std::string & str);
00474     unsigned long             getFlags       () const;
00479     std::string               getPattern     () const;
00486     Matcher                 * createMatcher  (const std::string & str);
00487 };
00488 
00489 class NFANode
00490 {
00491   friend class Matcher;
00492   public:
00493     NFANode * next;
00494     NFANode();
00495     virtual ~NFANode();
00496     virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
00497     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const = 0;
00498 };
00499 class NFACharNode : public NFANode
00500 {
00501   protected:
00502     char ch;
00503   public:
00504     NFACharNode(const char c);
00505     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00506 };
00507 class NFACICharNode : public NFANode
00508 {
00509   protected:
00510     char ch;
00511   public:
00512     NFACICharNode(const char c);
00513     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00514 };
00515 class NFAStartNode : public NFANode
00516 {
00517   public:
00518     NFAStartNode();
00519     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00520 };
00521 class NFAEndNode : public NFANode
00522 {
00523   public:
00524     NFAEndNode();
00525     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00526 };
00527 class NFAQuantifierNode : public NFANode
00528 {
00529   public:
00530     int min, max;
00531     NFANode * inner;
00532     virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
00533     NFAQuantifierNode(Pattern * pat, NFANode * internal,
00534                       const int minMatch = Pattern::MIN_QMATCH,
00535                       const int maxMatch = Pattern::MAX_QMATCH);
00536     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00537 };
00538 class NFAGreedyQuantifierNode : public NFAQuantifierNode
00539 {
00540   public:
00541     NFAGreedyQuantifierNode(Pattern * pat, NFANode * internal,
00542                             const int minMatch = Pattern::MIN_QMATCH,
00543                             const int maxMatch = Pattern::MAX_QMATCH);
00544     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00545     virtual int matchInternal(const std::string & str, Matcher * matcher, const int curInd, const int soFar) const;
00546 };
00547 class NFALazyQuantifierNode : public NFAQuantifierNode
00548 {
00549   public:
00550     NFALazyQuantifierNode(Pattern * pat, NFANode * internal,
00551                           const int minMatch = Pattern::MIN_QMATCH,
00552                           const int maxMatch = Pattern::MAX_QMATCH);
00553     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00554 };
00555 class NFAPossessiveQuantifierNode : public NFAQuantifierNode
00556 {
00557   public:
00558     NFAPossessiveQuantifierNode(Pattern * pat, NFANode * internal,
00559                                 const int minMatch = Pattern::MIN_QMATCH,
00560                                 const int maxMatch = Pattern::MAX_QMATCH);
00561     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00562 };
00563 class NFAAcceptNode : public NFANode
00564 {
00565   public:
00566     NFAAcceptNode();
00567     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00568 };
00569 class NFAClassNode : public NFANode
00570 {
00571   public:
00572     bool inv;
00573     std::map<char, bool> vals;
00574     NFAClassNode(const bool invert = 0);
00575     NFAClassNode(const std::string & clazz, const bool invert);
00576     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00577 };
00578 class NFACIClassNode : public NFANode
00579 {
00580   public:
00581     bool inv;
00582     std::map<char, bool> vals;
00583     NFACIClassNode(const bool invert = 0);
00584     NFACIClassNode(const std::string & clazz, const bool invert);
00585     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00586 };
00587 class NFASubStartNode : public NFANode
00588 {
00589   public:
00590     NFASubStartNode();
00591     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00592 };
00593 class NFAOrNode : public NFANode
00594 {
00595   public:
00596     NFANode * one;
00597     NFANode * two;
00598     NFAOrNode(NFANode * first, NFANode * second);
00599     virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
00600     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00601 };
00602 class NFAQuoteNode : public NFANode
00603 {
00604   public:
00605     std::string qStr;
00606     NFAQuoteNode(const std::string & quoted);
00607     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00608 };
00609 class NFACIQuoteNode : public NFANode
00610 {
00611   public:
00612     std::string qStr;
00613     NFACIQuoteNode(const std::string & quoted);
00614     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00615 };
00616 class NFALookAheadNode : public NFANode
00617 {
00618   public:
00619     bool pos;
00620     NFANode * inner;
00621     NFALookAheadNode(NFANode * internal, const bool positive);
00622     virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
00623     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00624 };
00625 class NFALookBehindNode : public NFANode
00626 {
00627   public:
00628     bool pos;
00629     std::string mStr;
00630     NFALookBehindNode(const std::string & str, const bool positive);
00631     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00632 };
00633 class NFAStartOfLineNode : public NFANode
00634 {
00635   public:
00636     NFAStartOfLineNode();
00637     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00638 };
00639 class NFAEndOfLineNode : public NFANode
00640 {
00641   public:
00642     NFAEndOfLineNode();
00643     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00644 };
00645 class NFAReferenceNode : public NFANode
00646 {
00647   public:
00648     int gi;
00649     NFAReferenceNode(const int groupIndex);
00650     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00651 };
00652 class NFAStartOfInputNode : public NFANode
00653 {
00654   public:
00655     NFAStartOfInputNode();
00656     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00657 };
00658 class NFAEndOfInputNode : public NFANode
00659 {
00660   public:
00661     bool term;
00662     NFAEndOfInputNode(const bool lookForTerm);
00663     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00664 };
00665 class NFAWordBoundaryNode : public NFANode
00666 {
00667   public:
00668     bool pos;
00669     NFAWordBoundaryNode(const bool positive);
00670     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00671 };
00672 class NFAEndOfMatchNode : public NFANode
00673 {
00674   public:
00675     NFAEndOfMatchNode();
00676     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00677 };
00678 class NFAGroupHeadNode : public NFANode
00679 {
00680   public:
00681     int gi;
00682     NFAGroupHeadNode(const int groupIndex);
00683     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00684 };
00685 class NFAGroupTailNode : public NFANode
00686 {
00687   public:
00688     int gi;
00689     NFAGroupTailNode(const int groupIndex);
00690     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00691 };
00692 class NFAGroupLoopPrologueNode : public NFANode
00693 {
00694   public:
00695     int gi;
00696     NFAGroupLoopPrologueNode(const int groupIndex);
00697     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00698 };
00699 class NFAGroupLoopNode : public NFANode
00700 {
00701   public:
00702     int gi, min, max, type;
00703     NFANode * inner;
00704     NFAGroupLoopNode(NFANode * internal, const int minMatch,
00705                      const int maxMatch, const int groupIndex, const int matchType);
00706     virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
00707     virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00708     int matchGreedy(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00709     int matchLazy(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00710     int matchPossessive(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00711 };
00712 
00713 #endif
00714 

Generated on Fri Apr 27 13:12:36 2007 for Highlight Code Converter by  doxygen 1.5.2