00001 #ifndef __PATTERN_H__ 00002 #define __PATTERN_H__ 00003 00004 #include <vector> 00005 #include <string> 00006 #include <map> 00007 00008 class Matcher; 00009 class NFANode; 00010 class NFAQuantifierNode; 00011 00028 class Pattern 00029 { 00030 friend class Matcher; 00031 friend class NFANode; 00032 friend class NFAQuantifierNode; 00033 private: 00041 Pattern(const std::string & rhs); 00042 protected: 00047 static std::map<std::string, Pattern *> compiledPatterns; 00053 static std::map<std::string, std::pair<std::string, unsigned long> > registeredPatterns; 00054 protected: 00059 std::map<NFANode*, bool> nodes; 00065 Matcher * matcher; 00069 NFANode * head; 00073 std::string pattern; 00078 bool error; 00084 int curInd; 00088 int groupCount; 00092 int nonCapGroupCount; 00096 unsigned long flags; 00097 protected: 00102 void raiseError(); 00108 NFANode * registerNode(NFANode * node); 00109 00119 std::string classUnion (std::string s1, std::string s2) const; 00129 std::string classIntersect (std::string s1, std::string s2) const; 00139 std::string classNegate (std::string s1) const; 00149 std::string classCreateRange(char low, char hi) const; 00150 00159 int getInt(int start, int end); 00171 bool quantifyCurly(int & sNum, int & eNum); 00183 NFANode * quantifyGroup(NFANode * start, NFANode * stop, const int gn); 00184 00192 NFANode * quantify(NFANode * newNode); 00199 std::string parseClass(); 00206 std::string parsePosix(); 00211 std::string parseOctal(); 00216 std::string parseHex(); 00221 NFANode * parseBackref(); 00231 std::string parseEscape(bool & inv, bool & quo); 00240 NFANode * parseRegisteredPattern(NFANode ** end); 00248 NFANode * parseBehind(const bool pos, NFANode ** end); 00253 NFANode * parseQuote(); 00262 NFANode * parse(const bool inParen = 0, const bool inOr = 0, NFANode ** end = NULL); 00263 public: 00265 const static unsigned long CASE_INSENSITIVE; 00267 const static unsigned long LITERAL; 00269 const static unsigned long DOT_MATCHES_ALL; 00273 const static unsigned long MULTILINE_MATCHING; 00277 const static unsigned long UNIX_LINE_MODE; 00279 const static int MIN_QMATCH; 00281 const static int MAX_QMATCH; 00282 public: 00295 static Pattern * compile (const std::string & pattern, 00296 const unsigned long mode = 0); 00310 static Pattern * compileAndKeep (const std::string & pattern, 00311 const unsigned long mode = 0); 00312 00332 static std::string replace (const std::string & pattern, 00333 const std::string & str, 00334 const std::string & replaceWith, 00335 const unsigned long mode = 0); 00336 00360 static std::vector<std::string> split (const std::string & pattern, 00361 const std::string & str, 00362 const bool keepEmptys = 0, 00363 const unsigned long limit = 0, 00364 const unsigned long mode = 0); 00365 00384 static std::vector<std::string> findAll (const std::string & pattern, 00385 const std::string & str, 00386 const unsigned long mode = 0); 00387 00397 static bool matches (const std::string & pattern, 00398 const std::string & str, 00399 const unsigned long mode = 0); 00400 00420 static bool registerPattern(const std::string & name, 00421 const std::string & pattern, 00422 const unsigned long mode = 0); 00423 00427 static void unregisterPatterns(); 00431 static void clearPatternCache(); 00432 00454 static std::pair<std::string, int> findNthMatch (const std::string & pattern, 00455 const std::string & str, 00456 const int matchNum, 00457 const unsigned long mode = 0); 00458 public: 00462 ~Pattern(); 00463 00464 std::string replace (const std::string & str, 00465 const std::string & replaceWith); 00466 std::vector<std::string> split (const std::string & str, const bool keepEmptys = 0, 00467 const unsigned long limit = 0); 00468 std::vector<std::string> findAll (const std::string & str); 00469 bool matches (const std::string & str); 00474 unsigned long getFlags () const; 00479 std::string getPattern () const; 00486 Matcher * createMatcher (const std::string & str); 00487 }; 00488 00489 class NFANode 00490 { 00491 friend class Matcher; 00492 public: 00493 NFANode * next; 00494 NFANode(); 00495 virtual ~NFANode(); 00496 virtual void findAllNodes(std::map<NFANode*, bool> & soFar); 00497 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const = 0; 00498 }; 00499 class NFACharNode : public NFANode 00500 { 00501 protected: 00502 char ch; 00503 public: 00504 NFACharNode(const char c); 00505 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00506 }; 00507 class NFACICharNode : public NFANode 00508 { 00509 protected: 00510 char ch; 00511 public: 00512 NFACICharNode(const char c); 00513 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00514 }; 00515 class NFAStartNode : public NFANode 00516 { 00517 public: 00518 NFAStartNode(); 00519 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00520 }; 00521 class NFAEndNode : public NFANode 00522 { 00523 public: 00524 NFAEndNode(); 00525 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00526 }; 00527 class NFAQuantifierNode : public NFANode 00528 { 00529 public: 00530 int min, max; 00531 NFANode * inner; 00532 virtual void findAllNodes(std::map<NFANode*, bool> & soFar); 00533 NFAQuantifierNode(Pattern * pat, NFANode * internal, 00534 const int minMatch = Pattern::MIN_QMATCH, 00535 const int maxMatch = Pattern::MAX_QMATCH); 00536 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00537 }; 00538 class NFAGreedyQuantifierNode : public NFAQuantifierNode 00539 { 00540 public: 00541 NFAGreedyQuantifierNode(Pattern * pat, NFANode * internal, 00542 const int minMatch = Pattern::MIN_QMATCH, 00543 const int maxMatch = Pattern::MAX_QMATCH); 00544 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00545 virtual int matchInternal(const std::string & str, Matcher * matcher, const int curInd, const int soFar) const; 00546 }; 00547 class NFALazyQuantifierNode : public NFAQuantifierNode 00548 { 00549 public: 00550 NFALazyQuantifierNode(Pattern * pat, NFANode * internal, 00551 const int minMatch = Pattern::MIN_QMATCH, 00552 const int maxMatch = Pattern::MAX_QMATCH); 00553 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00554 }; 00555 class NFAPossessiveQuantifierNode : public NFAQuantifierNode 00556 { 00557 public: 00558 NFAPossessiveQuantifierNode(Pattern * pat, NFANode * internal, 00559 const int minMatch = Pattern::MIN_QMATCH, 00560 const int maxMatch = Pattern::MAX_QMATCH); 00561 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00562 }; 00563 class NFAAcceptNode : public NFANode 00564 { 00565 public: 00566 NFAAcceptNode(); 00567 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00568 }; 00569 class NFAClassNode : public NFANode 00570 { 00571 public: 00572 bool inv; 00573 std::map<char, bool> vals; 00574 NFAClassNode(const bool invert = 0); 00575 NFAClassNode(const std::string & clazz, const bool invert); 00576 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00577 }; 00578 class NFACIClassNode : public NFANode 00579 { 00580 public: 00581 bool inv; 00582 std::map<char, bool> vals; 00583 NFACIClassNode(const bool invert = 0); 00584 NFACIClassNode(const std::string & clazz, const bool invert); 00585 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00586 }; 00587 class NFASubStartNode : public NFANode 00588 { 00589 public: 00590 NFASubStartNode(); 00591 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00592 }; 00593 class NFAOrNode : public NFANode 00594 { 00595 public: 00596 NFANode * one; 00597 NFANode * two; 00598 NFAOrNode(NFANode * first, NFANode * second); 00599 virtual void findAllNodes(std::map<NFANode*, bool> & soFar); 00600 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00601 }; 00602 class NFAQuoteNode : public NFANode 00603 { 00604 public: 00605 std::string qStr; 00606 NFAQuoteNode(const std::string & quoted); 00607 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00608 }; 00609 class NFACIQuoteNode : public NFANode 00610 { 00611 public: 00612 std::string qStr; 00613 NFACIQuoteNode(const std::string & quoted); 00614 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00615 }; 00616 class NFALookAheadNode : public NFANode 00617 { 00618 public: 00619 bool pos; 00620 NFANode * inner; 00621 NFALookAheadNode(NFANode * internal, const bool positive); 00622 virtual void findAllNodes(std::map<NFANode*, bool> & soFar); 00623 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00624 }; 00625 class NFALookBehindNode : public NFANode 00626 { 00627 public: 00628 bool pos; 00629 std::string mStr; 00630 NFALookBehindNode(const std::string & str, const bool positive); 00631 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00632 }; 00633 class NFAStartOfLineNode : public NFANode 00634 { 00635 public: 00636 NFAStartOfLineNode(); 00637 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00638 }; 00639 class NFAEndOfLineNode : public NFANode 00640 { 00641 public: 00642 NFAEndOfLineNode(); 00643 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00644 }; 00645 class NFAReferenceNode : public NFANode 00646 { 00647 public: 00648 int gi; 00649 NFAReferenceNode(const int groupIndex); 00650 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00651 }; 00652 class NFAStartOfInputNode : public NFANode 00653 { 00654 public: 00655 NFAStartOfInputNode(); 00656 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00657 }; 00658 class NFAEndOfInputNode : public NFANode 00659 { 00660 public: 00661 bool term; 00662 NFAEndOfInputNode(const bool lookForTerm); 00663 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00664 }; 00665 class NFAWordBoundaryNode : public NFANode 00666 { 00667 public: 00668 bool pos; 00669 NFAWordBoundaryNode(const bool positive); 00670 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00671 }; 00672 class NFAEndOfMatchNode : public NFANode 00673 { 00674 public: 00675 NFAEndOfMatchNode(); 00676 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00677 }; 00678 class NFAGroupHeadNode : public NFANode 00679 { 00680 public: 00681 int gi; 00682 NFAGroupHeadNode(const int groupIndex); 00683 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00684 }; 00685 class NFAGroupTailNode : public NFANode 00686 { 00687 public: 00688 int gi; 00689 NFAGroupTailNode(const int groupIndex); 00690 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00691 }; 00692 class NFAGroupLoopPrologueNode : public NFANode 00693 { 00694 public: 00695 int gi; 00696 NFAGroupLoopPrologueNode(const int groupIndex); 00697 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00698 }; 00699 class NFAGroupLoopNode : public NFANode 00700 { 00701 public: 00702 int gi, min, max, type; 00703 NFANode * inner; 00704 NFAGroupLoopNode(NFANode * internal, const int minMatch, 00705 const int maxMatch, const int groupIndex, const int matchType); 00706 virtual void findAllNodes(std::map<NFANode*, bool> & soFar); 00707 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00708 int matchGreedy(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00709 int matchLazy(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00710 int matchPossessive(const std::string & str, Matcher * matcher, const int curInd = 0) const; 00711 }; 00712 00713 #endif 00714