nfa_epsilon.h

00001 /*
00002  * ASTL - the Automaton Standard Template Library.
00003  * C++ generic components for Finite State Machine handling.
00004  * Copyright (C) 2000 Vincent Le Maout (vlemaout@lexiquest.fr).
00005  * 
00006  * This library is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  * 
00011  * This library is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  * 
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with this library; if not, write to the Free Software
00018  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00019  *
00020  */
00021 
00022 
00023 //
00024 //      File:           nfa_epsilon.hh
00025 //      Copyright:      Vincent LE MAOUT
00026 //      Date:           Thu Jul 30 14:15:30 MET DST 1998
00027 
00028 
00029 #ifndef ASTL_NFA_Epsilon
00030 #define ASTL_NFA_Epsilon
00031 
00032 #include <set>
00033 #include <stack>
00034 #include <vector>
00035 #include <functional>
00036 #include <iterator>
00037 #include <algorithm>
00038 #include <map>
00039 #include <state_ref.h>
00040 #include <dfa_default.h>
00041 #include <tag.h>
00042 
00043 #ifdef WIN32
00044 using namespace std;
00045 #endif
00046 
00047 template <class Alphabet, class Tag>
00048 struct t_state {
00049   Tag _leTag;
00050     
00051   // si _lalettre==Alphabet() on a 0, 1 ou 2 deux epsilon transitions (aim1 et/ou aim2) 
00052   // sinon on a une transition (aim1) et peut-etre une epsilon trans (aim2)
00053   // la transition par defaut est dans default_t
00054   Alphabet _laLettre;
00055   t_state<Alphabet, Tag> *_aim1, *_aim2, *default_t;
00056 
00057   t_state() 
00058     : _leTag(Tag()), _laLettre(0), _aim1(0),_aim2(0), default_t(0)
00059   { }
00060 };
00061 
00062 
00063 template <class edges_iterator, class State>
00064 edges_iterator& operator_plus_plus (edges_iterator &t, const State &)
00065 {
00066   // Precondition:all states in q have at least 1 transition and/or a default trans                             
00067   if (t.i == t.e->letters.end())    // at edges end ?
00068   {
00069     t.e = NULL;
00070     return (t);
00071   }
00072   t.result = State();
00073   t.result = t.e->nfa->delta1(t.e->q, *(t.i));
00074   ++t.i;
00075   return (t);
00076 }
00077 
00078 template <class _Sigma,class _Tag> 
00079 class NFA_Epsilon
00080 {
00081 public:
00082   typedef _Sigma                    Sigma;
00083   typedef _Tag                      Tag;
00084   typedef typename _Sigma::Alphabet Alphabet;           
00085 private:
00086   typedef NFA_Epsilon<Sigma, Tag> self;
00087   typedef t_state<Alphabet, Tag> _state;
00088   typedef _state * p_state;
00089   
00090 public:
00091   struct compare_state : binary_function<p_state, p_state, bool> 
00092   {
00093     bool operator ()(const p_state & x, const p_state & y) const 
00094     {
00095       if (x->_leTag == y->_leTag)
00096         return (x < y);
00097       else
00098         return (!(x->_leTag < y->_leTag)) ;
00099     }
00100   };
00101 
00103   typedef set<p_state, compare_state> State;
00104         
00105 private:
00106   mutable vector<p_state> closure;
00107   vector<State> Q;
00108   State         I;
00109   Alphabet      lettre_nulle;
00110   Tag           tag_nul;
00111   unsigned long _trans_count, _state_count;
00112 
00113 public: 
00114   class Edges
00115   {
00116     friend class NFA_Epsilon<_Sigma, _Tag>;  
00117   public:
00118     State q;
00119     const NFA_Epsilon<_Sigma, _Tag> *nfa;
00120     vector<Alphabet> letters;
00121 
00122   public:
00123     // typedefs:
00124 
00125     typedef Alphabet                     key_type;
00126     typedef pair<Alphabet, State>  value_type;
00127     typedef less<Alphabet>               key_compare;
00128     typedef value_type*                  pointer;
00129     typedef const value_type   &         const_reference;
00130     typedef unsigned int                 size_type; 
00131     typedef int                          difference_type;
00132     //    typedef    value_compare;
00133 
00134 #ifdef WIN32    
00135     class const_iterator 
00136       : public iterator<forward_iterator_tag, value_type, difference_type>
00137 #else
00138     class const_iterator 
00139       : public forward_iterator<value_type, difference_type>
00140 #endif
00141 
00142     {
00143       friend class Edges;
00144     public:
00145       const Edges *e;
00146       vector<Alphabet>::const_iterator i;
00147       State result;
00148 
00149       const_iterator(const Edges *_e) : e(_e), i(_e->letters.begin())
00150       { 
00151         ++(*this);
00152       }
00153 
00154     public:
00155       const_iterator() : e(NULL)
00156       { }
00157 
00158       const_iterator(const const_iterator &x)
00159         : e(x.e), i(x.i)
00160       { }
00161 
00162       bool operator == (const const_iterator& x) const
00163       {
00164         // WARNING:
00165         return (x.e == e);
00166       }
00167 
00168       value_type operator * () const {
00169         return (make_pair(i[-1], result));
00170       }
00171 
00172       const_iterator& operator ++ () {
00173         // Precondition:all states in q have at least 1 transition and/or a default trans                               
00174         if (i == e->letters.end())    // at edges end ?
00175         {
00176           e = NULL;
00177           return (*this);
00178         }
00179         result = State();
00180         result = e->nfa->delta1(e->q, *i);
00181         ++i;
00182         return (*this);
00183   //    return (operator_plus_plus(*this, State()));
00184       }
00185 
00186       const_iterator operator ++ (int) 
00187       { 
00188         const_iterator tmp = *this;
00189         ++(*this);
00190         return (tmp);
00191       }
00192     };
00193     
00194     // allocation/deallocation:
00195 
00196     
00197     Edges() { }
00198     
00199     Edges(const Edges &e) : q(e.q), nfa(e.nfa), letters(e.letters)
00200     { }
00201 
00202   private:
00203     Edges(const State & s, const NFA_Epsilon<_Sigma, _Tag> *_nfa, vector<Alphabet> &l) 
00204       : q(s), nfa(_nfa), letters(l)
00205     { }
00206 
00207   public:
00208     ~Edges() { }
00209     
00210     // accessors:
00211         
00212     //    key_compare key_comp() const { return (edg.key_comp()); } 
00213     //    value_compare value_comp() const { return (edg.value_comp()); }
00214     const_iterator begin() const {      
00215       return (const_iterator(this)); 
00216       // Postcondition:no states without transitions in q
00217     }
00218     
00219     const_iterator end() const { 
00220       const_iterator i;
00221       return (i); 
00222     }
00223     
00224     //    bool empty() const { 
00225     //     return (q.empty()); 
00226     //   }
00227 
00228     //   size_type size() const {
00229     //    return (q.size());
00230       /*
00231       size_type how_many = q.size();
00232       State::const_iterator first, last = q.end();
00233       for(first = q.begin(); first != last; ++first)
00234         if ((*first)->_aim2)
00235           ++how_many;
00236       return (how_many); 
00237       */
00238     //   }
00239  
00240     //          size_type max_size() const { return (edg.max_size()); }
00241 
00242     // multimap operations:
00243 
00244     //        const_iterator find(const key_type& x) const { return (const_iterator(edg.find(x))); }
00245     //      size_type count(const key_type& x) const { return (edg.count(x)); }
00246     //      const_iterator lower_bound(const key_type& x) const { return (const_iterator(edg.lower_bound(x))); }
00247     //      const_iterator upper_bound(const key_type& x) const { return (const_iterator(edg.upper_bound(x))); }
00248     //      pair<const_iterator, const_iterator> equal_range(const key_type& x) const
00249     //      {
00250     //      pair<typename NFA::Edges::const_iterator, typename NFA::Edges::const_iterator> p = edg.equal_range(x);
00251     //      return (make_pair(const_iterator(p.first), const_iterator(p.second)));
00252     //      }
00253 
00254     // comparison:
00255         
00256     friend bool operator == (const Edges& x, const Edges& y) {
00257       return (x.q == y.q);
00258     }
00259   };
00260     
00261    
00262 
00263   // Back to NFA_Epsilon class
00264 
00265   State null_state;
00266 
00267   NFA_Epsilon(unsigned long n = 0) 
00268     : Q(), I(), lettre_nulle(0),
00269       tag_nul(Tag()), _trans_count(0), _state_count(0), null_state() 
00270   { 
00271     closure.reserve(2048);
00272   }
00273 
00274   typedef vector<State>::const_iterator const_iterator;
00275 
00276   State new_state()
00277   {    
00278     _state *q = new _state; 
00279     State r;
00280     r.insert(q);
00281     Q.push_back(r);
00282     ++_state_count;
00283     //    cout << "NFA_epsilon::new_state() = " << q << endl;
00284     return (r);
00285   }
00286 
00287   template <class OutputIterator>
00288   OutputIterator new_state(unsigned long how_many, OutputIterator x)
00289   {
00290     for(; how_many > 0; --how_many)
00291       *x++ = new_state();
00292     return x;
00293   }
00294 
00295   /*
00296     void set_trans(State &from, const Alphabet &a, State &to)
00297     {
00298     //ne fait pas grand chose 
00299     }
00300     */
00301   private :
00302   void set_trans(_state *from, const Alphabet &a, _state *to)
00303   {
00304     from->_aim1 = to;
00305     from->_laLettre = a;
00306     ++_trans_count;
00307     //    cout << "set_trans(" << from << ", '" << a << "', " << to << ")" << endl;
00308   }
00309 
00310   void set_trans(_state *from, _state *to)
00311   {
00312     from->_aim2 = to;   
00313     I = epsilon_closure(I);
00314     ++_trans_count;
00315     //    cout << "set_epsilon_trans(" << from << ", " << to << ")" << endl;
00316   }
00317   void set_trans(_state *from, _state *to1, _state *to2)
00318   {
00319     from->_laLettre = Alphabet(0);
00320     from->_aim1 = to1; 
00321     from->_aim2 = to2; 
00322     I = epsilon_closure(I);
00323     _trans_count += 2;
00324     //   cout << "set_2_epsilon_trans(" << from << ", " << to1 << ", " << to2 << ")" << endl;
00325   }
00326 
00327   void set_default_trans(_state *from, _state *to)
00328   {
00329     from->default_t = to;
00330     ++_trans_count;
00331     //    cout << "set_default_trans(" << from << ", " << to << ")" << endl;
00332   }
00333 
00334 public:
00335   // attention : a finir
00336   void set_trans(const State & from, const Alphabet &a, const State & to)
00337   {
00338     set_trans(*(from.begin()), a, *(to.begin()));
00339   }
00340   void set_trans(const State & from, const State & to)
00341   {
00342     set_trans(*(from.begin()), *(to.begin()));
00343   }
00344   void set_trans(const State & from, const State & to1, const State & to2)
00345   {
00346     set_trans(*(from.begin()), *(to1.begin()), *(to2.begin()));
00347   }
00348   void set_default_trans(const State & from, const State & to)
00349   {
00350     set_default_trans(*(from.begin()), *(to.begin()));
00351   }
00352 
00354 
00355 
00356   public :
00357 
00358   template <class InputIterator>  
00359   void del_trans(const State & s, const Alphabet &a)
00360   {
00361     //
00362   }
00363 
00364 
00365   void del_trans(const State & s, const Alphabet &l, const State & aim)
00366   {
00367     //
00368   }
00369 
00370   void change_trans(const State & s, const Alphabet &l, const State & former_aim, const State & new_aim)
00371   {
00372     //
00373   }
00374 
00375   void copy_state(const State & from, const State & to) {
00376     **(to.begin()) = **(from.begin());
00377   }
00378 
00379   void del_state(const State &s) {
00380     --_state_count;
00381   }
00382 
00383   State duplicate_state(const State & s) {
00384     return State(s);
00385   }
00386 
00387   bool final(const State & s) const {
00389     return (this->tag(s) > tag_nul);
00390   }
00391 
00392 
00393   Tag& tag(State & s) {
00394     return (*(s.begin()))->_leTag ;
00395   }
00396 
00397   const Tag& tag(const State & s) const {
00398     return (*(s.begin()))->_leTag ;
00399   }
00400 
00401   Tag& tag(p_state p) {
00402     return (p->_leTag);
00403   }
00404 
00405   const Tag& tag(p_state p) const {
00406     return (p->_leTag);
00407   }
00408 
00409   State epsilon_closure(const State & s) const
00410   {
00411     State r;
00412     _state *t;
00413 
00414     State::const_iterator itb = s.begin();
00415     State::const_iterator ite = s.end() ; 
00416 
00417     for(; itb != ite ; ++itb)  
00418       closure.push_back(*itb);
00419 
00420 //    cout << "epsilon cloture: tags = ";
00421     for(; ! closure.empty();)
00422     {
00423       t = closure.back();
00424       closure.pop_back();
00425       if (t->_aim2)    // epsilon trans ?
00426         closure.push_back(t->_aim2);
00427 //      cout << t->_leTag << " ";
00428       if (t->_leTag != tag_nul || t->_laLettre != lettre_nulle || t->default_t)
00429         r.insert(t);
00430       if (t->_laLettre == lettre_nulle && t->_aim1)
00431         closure.push_back(t->_aim1);
00432     }
00433 
00434 //    cout << endl;
00435     return r;
00436   }
00437       
00438   /*
00439   State epsilon_closure(const State & s) const
00440   {
00441     State r;
00442     _state *t;
00443 
00444     State::const_iterator itb = s.begin();
00445     State::const_iterator ite = s.end() ; 
00446 
00447     for(; itb != ite ; ++itb)  
00448       closure.push_back(*itb);
00449 
00450     for(; ! closure.empty();)
00451     {
00452       t = closure.back();
00453       closure.pop_back();
00454       if (t->_laLettre == lettre_nulle) 
00455       {
00456         if (t->_aim2)  // des epsilon transitions ?
00457         {
00458           closure.push_back(t-> _aim2);
00459           if (t->_aim1)
00460             closure.push_back(t-> _aim1);
00461 
00462           if (t->default_t != NULL)
00463             r.insert(t);
00464         }
00465         else  // lettre nulle + pas d epsilon trans =>
00466           r.insert(t);
00467       }
00468       else
00469       {
00470         r.insert(t);
00471         if (t->_aim2)
00472           closure.push_back(t-> _aim2);
00473       }
00474     }
00475     return r;
00476   }
00477 */
00478 
00479   State delta1_default(const State & s) const
00480   { 
00481     
00482     State r;
00483 
00484     State::const_iterator itb = s.begin();
00485     State::const_iterator ite = s.end() ; 
00486 
00487     for(; itb != ite ; ++itb)  
00488       if ((*itb)->default_t != NULL)         // default trans
00489               r.insert((*itb)->default_t);
00490 
00491     return (epsilon_closure(r));
00492   }
00493     /*
00494     for(; ! closure.empty();)
00495     {
00496       t = closure.back();
00497       closure.pop_back();
00499 
00500       if (t->_laLettre == lettre_nulle) 
00501       {
00502         if (t->_aim2)
00503         {
00504           closure.push_back(t-> _aim2);
00505           if (t->_aim1)
00506             closure.push_back(t-> _aim1);
00507           if (t->default_t)
00508             r.insert(t);
00509         }
00510         else
00511           r.insert(t);
00512       }
00513       else
00514       {
00515         if (t->_aim2)
00516           closure.push_back(t-> _aim2);
00517         r.insert(t);
00518       }
00519     }
00520     return  r;
00521   }
00522 */
00523   State delta1(const State & s, const Alphabet &l) const
00524   { 
00525     
00526     State r;
00527 
00528     State::const_iterator itb = s.begin();
00529     State::const_iterator ite = s.end() ; 
00530 
00531     for(; itb != ite ; ++itb)  
00532       if ((*itb)->_laLettre == l )
00533           r.insert((*itb)->_aim1);
00538                 if (r.empty())
00539                         return (delta1_default(s));
00540 
00541     return (epsilon_closure(r));
00542   }
00543     /*
00544     for(; ! closure.empty();)
00545     {
00546       t = closure.back();
00547       closure.pop_back();
00549 
00550       if (t->_laLettre == lettre_nulle) 
00551       {
00552         if (t->_aim2)
00553         {
00554           closure.push_back(t-> _aim2);
00555           if (t->_aim1)
00556             closure.push_back(t-> _aim1);
00557           if (t->default_t)
00558             r.insert(t);
00559         }
00560         else
00561           r.insert(t);
00562       }
00563       else
00564       {
00565         r.insert(t);
00566         if (t->_aim2)
00567           closure.push_back(t-> _aim2);
00568       }
00569     }
00570     return  r;
00571   }
00572 */
00573   Edges delta2(const State & s) const
00574   {
00575     State q;
00576     vector<Alphabet> v;
00577     State::const_iterator first, last = s.end();
00578     for(first = s.begin(); first != last; ++first)
00579       if ((*first)->_laLettre != lettre_nulle)
00580       {
00581 //                              cout << "lettre nulle == " << lettre_nulle << " et la lettre = " << (*first)->_laLettre << endl;
00582 //              cout << "delta2:test de l'etat " << *first << "   1er but = " << (*first)->_aim1 << endl;
00583         q.insert(*first);
00584         v.push_back((*first)->_laLettre);
00585       }
00589    
00590     return Edges(q, this, v);
00591   }
00592       
00593   State initial() const {
00594     return (I);
00595   }
00596 
00597   void initial(const State & i) {
00598     I = epsilon_closure(i);
00599   }
00600 
00601   unsigned long state_count() const {
00602     return (_state_count);
00603   }
00604 
00605   unsigned long trans_count() const {
00606     return (_trans_count);
00607   }
00608 
00609   const_iterator begin() const {
00610     return (Q.begin());
00611   }
00612 
00613   const_iterator end() const {
00614     return (Q.end());
00615   }
00616 };
00617 
00618 template <class NFA, class InputIterator>
00619 class t_iterator : public iterator<input_iterator_tag, NFA::Tag>
00620 {
00621   InputIterator where;
00622   const NFA &nfa;
00623 
00624 public:
00625   t_iterator(InputIterator i, const NFA &_nfa) : where(i), nfa(_nfa)
00626   { }
00627 
00628   t_iterator(const t_iterator &x) : where(x.where), nfa(x.nfa)
00629   { }
00630 
00631   const NFA::Tag& operator * () const {
00632     return (nfa.tag(*where));
00633   }
00634 
00635   t_iterator& operator ++ () {
00636     ++where;
00637     return (*this);
00638   }
00639 
00640   t_iterator operator ++ (int) {
00641     t_iterator tmp = *this;
00642     ++(*this);
00643     return (tmp);
00644   }
00645 
00646   bool operator == (const t_iterator &x) const {
00647     return (where == x.where);
00648   }
00649 
00650   bool operator != (const t_iterator &x) const {
00651     return (!(*this == x));
00652   }
00653 };
00654 
00655 template <class _DFA, class Sigma, class Tag>
00656 void nfa_epsilon_to_dfa(const NFA_Epsilon<Sigma, Tag> &nfa,
00657                         DFA_default<_DFA> &dfa)
00658 {
00659   typedef NFA_Epsilon<Sigma, Tag> NFA;
00660   typedef DFA_default<_DFA> DFA;
00661   NFA::State Q = nfa.initial();
00662   if (Q == nfa.null_state)
00663     return;
00664   DFA::State q = dfa.new_state(), p;
00665   dfa.initial(q);
00666 //      cout << "dterminisation de l'etat initial" << endl;
00667   tag_traits<NFA::Tag>::tag_determinize(t_iterator<NFA, NFA::State::const_iterator>(Q.begin(), nfa),
00668                                         t_iterator<NFA, NFA::State::const_iterator>(Q.end(), nfa),
00669                                         dfa.tag(q));         
00670   map<NFA::State, DFA::State> mapping;
00671   typedef map<NFA::State, DFA::State>::iterator it;
00672   it where, where_to;
00673   it path[10240];
00674   it *top = path;
00675 
00676   // cout << "Q (initial) size = " << Q.size() << endl;
00677   *top = mapping.insert(make_pair(Q, q)).first;
00678   //  path.push_back((mapping.insert(make_pair(Q, q))).first);
00679   //  cout << "titi" << endl;
00680   ++top;
00681   pair<NFA::Alphabet, NFA::State> trans;
00682   while (top != path)
00683   {
00684     //  cout << "top != path" << endl;
00685     --top;
00686     // cout << "Q size = " << (**top).first.size() << endl;
00687     where = *top;
00688     q = (*where).second;
00689 
00690     //  cout << "(*where).second = " << q << endl; 
00691     // default transitions:
00692 //              cout << "transitions par defaut" << endl;
00693     Q = nfa.delta1_default((*where).first);
00694     if (!(Q == nfa.null_state))
00695     {
00696       //      cout << "dans default Q size = " << Q.size() << endl;
00697       where_to = mapping.find(Q);
00698       if (where_to != mapping.end())
00699       {
00701         dfa.set_trans(q, dfa.default_letter, (*where_to).second);
00702       }
00703       else
00704       {
00705         //      cout << "default:creating state " << ( p = dfa.new_state()) << endl;
00706         p = dfa.new_state();
00707         tag_traits<NFA::Tag>::tag_determinize(t_iterator<NFA, NFA::State::const_iterator>(Q.begin(), nfa),
00708                                               t_iterator<NFA, NFA::State::const_iterator>(Q.end(), nfa),
00709                                               dfa.tag(p));         
00710         dfa.set_trans(q, dfa.default_letter, p);
00711         dfa.final(p) = nfa.final(Q);
00712         *top = mapping.insert(make_pair(Q, p)).first;
00713         ++top;
00714         //       path.push_back((mapping.insert(make_pair(Q, p))).first);
00715       }
00716     }
00717 
00718     // normal transition:
00719 //              cout << "transitions normales" << endl;
00720     NFA::Edges edges = nfa.delta2((*where).first);
00721     NFA::Edges::const_iterator first, last = edges.end();
00722     for(first = edges.begin(); first != last; ++first)
00723     {
00724 //                      cout << "lettre = " << (*first).first << endl;
00725       trans = *first;
00726       where_to = mapping.find(trans.second);
00727       if (where_to != mapping.end())
00728         dfa.set_trans(q, trans.first, (*where_to).second);
00729       else
00730       {
00731         p = dfa.new_state();
00732         tag_traits<NFA::Tag>::tag_determinize(t_iterator<NFA, NFA::State::const_iterator>(trans.second.begin(), nfa),
00733                                               t_iterator<NFA, NFA::State::const_iterator>(trans.second.end(), nfa),
00734                                               dfa.tag(p));         
00735         //      cout << "set_trans(" << q << ", " << trans.first << ", " << p << ")" << endl;
00736         dfa.set_trans(q, trans.first, p);
00737         dfa.final(p) = nfa.final(trans.second);
00738         *top = (mapping.insert(make_pair(trans.second, p))).first;
00739         ++top;
00740         //        path.push_back((mapping.insert(make_pair(trans.second, p))).first);
00741       }
00742     }
00743   }
00744 }
00745 
00746 
00747 #endif // ASTL_NFA_Epsilon
00748 
00749 
00750 
00751 
00752 
00753 
00754 
00755 
00756 
00757 
00758 
00759 
00760 
00761 

Generated on Sun Mar 8 02:41:34 2009 for ASTL by  doxygen 1.5.7.1