language.h

00001 /*
00002  * ASTL - the Automaton Standard Template Library.
00003  * C++ generic components for Finite State Automata handling.
00004  * Copyright (C) 2000-2003 Vincent Le Maout (vincent.lemaout@chello.fr).
00005  * 
00006  * This library is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  * 
00011  * This library is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  * 
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with this library; if not, write to the Free Software
00018  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00019  *
00020  */
00021 
00022 #ifndef ASTL_LANGUAGE_H
00023 #define ASTL_LANGUAGE_H
00024 
00025 #include <astl.h>
00026 #include <iterator>
00027 #include <algorithm>
00028 #include <vector>
00029 #include <string>
00030 #include <iostream>
00031 
00032 using namespace std;
00033 
00034 namespace astl {
00035 
00036 template <class DFirstCursor>
00037 void language(ostream &out,                        // where to write
00038               DFirstCursor first,                  // start position
00039               DFirstCursor last = DFirstCursor())  // stop condition
00040 {
00041 #ifndef _MSC_VER
00042   vector<typename DFirstCursor::char_type> word;
00043 #else
00044   vector<DFirstCursor::char_type> word;
00045 #endif
00046   for(word.reserve(16); first != last; ) {
00047     word.push_back(first.letter());
00048     if (first.aim_final()) {
00049 #ifndef _MSC_VER
00050       for(typename vector<typename DFirstCursor::char_type>::const_iterator i =
00051 #else
00052             for(typename vector<DFirstCursor::char_type>::const_iterator i =
00053 #endif
00054                   word.begin(); i != word.end(); ++i)
00055             out << *i;
00056           out << endl;
00057           }
00058       while (!first.forward())  word.pop_back();
00059     }
00060   }
00061 
00062 template <typename DFirstCursor>
00063 class language_iterator
00064   : public iterator<input_iterator_tag,
00065                     basic_string<typename DFirstCursor::char_type> >
00066 {
00067 public:
00068   typedef basic_string<typename DFirstCursor::char_type> value_type;
00069 
00070 public:
00071   typedef language_iterator self;
00072 
00073   language_iterator()  // end of range
00074     : first(), last()
00075   { }
00076 
00077   language_iterator(const DFirstCursor &x)  // beginning of range
00078     : first(x), last()
00079   {
00080     current.reserve(128);
00081     if (first != last) {
00082       current += first.letter();
00083       if (!first.aim_final()) ++*this;
00084     }
00085   }
00086 
00087   const DFirstCursor& cursor() const { return first; }
00088 
00089   const value_type& operator*() const { return current; }
00090 
00091   const value_type* operator->() const { return &current; }
00092 
00093 
00094   bool operator==(const self &x) const { return first == x.first; }
00095 
00096   bool operator!= (const self &x) const { return first != x.first; }
00097 
00098   self& operator++ () {
00099     while (1)
00100       if (!first.forward())                      // ascending
00101         current.erase(current.size() - 1, 1);    // pop
00102       else
00103         if (first == last) return *this;
00104         else {
00105           current += first.letter();  // descending, push & test
00106           if (first.aim_final()) return *this;
00107         }
00108     return *this;
00109   }
00110 
00111 protected:
00112   value_type current;
00113   DFirstCursor first, last;
00114 };
00115 
00116 template <typename DFirstCursor>
00117 inline
00118 language_iterator<DFirstCursor> languagei(const DFirstCursor &x) {
00119   return language_iterator<DFirstCursor>(x);
00120 }
00121 
00122 /*
00123 TODO:
00124 template <typename DFA, typename InputIterator>
00125 inline
00126 typename DFA::state_type
00127 add_word(DFA &dfa, InputIterator first, InputIterator last)
00128 {
00129   ccursor<DFA> c(dfa);
00130   forward(c, first, last);
00131   c.src_final(true);
00132   return c.src();
00133 }
00134 
00135 template <typename DFA, typename InputIterator>
00136 inline
00137 typename DFA::state_type
00138 add_word(DFA &dfa, InputIterator first, InputIterator last,
00139          const typename DFA::tag_type &t)
00140 {
00141   typename DFA::state_type q = add_word(dfa, first, last);
00142   dfa.tag(q) = t;
00143   return q;
00144 }
00145 */
00146 
00147 template <typename DFA, typename InputIterator>
00148 inline
00149 typename DFA::state_type
00150 add_word(DFA &dfa, InputIterator first, InputIterator last,
00151          const typename DFA::tag_type &t = typename DFA::tag_type())
00152 {
00153   typename DFA::state_type q, i = dfa.initial();
00154   typename DFA::char_type a;
00155   if (i == DFA::null_state) {
00156     i = dfa.new_state();
00157     dfa.initial(i);
00158   }
00159   // for each letter in w:
00160   for (; !(first == last); ++first) {  
00161     a = *first;
00162     q = dfa.delta1(i, a);
00163     if (q == DFA::null_state) {  // transition does not exist ?
00164       q = dfa.new_state();
00165       dfa.set_trans(i, a, q);
00166     }
00167     i = q;
00168   }
00169   dfa.final(i) = true;
00170   dfa.tag(i) = t;
00171   return (i);
00172 }
00173 
00174 template <typename DFA, typename InputIterator>
00175 inline
00176 void add_words(DFA &dfa, InputIterator start, InputIterator finish)
00177 {
00178   // for each word in the sequence:
00179   for(; start != finish; ++start)
00180     add_word(dfa, (*start).begin(), (*start).end());
00181 }
00182 
00183 template <class ForwardI>
00184 class string_cursor : public forward_cursor_concept
00185 {
00186 protected:
00187   ForwardI start, finish;
00188   bool sink_state;
00189 
00190 public:
00191   typedef empty          tag_type;
00192   typedef ForwardI state_type;
00193   typedef string_cursor   self;
00194   typedef typename iterator_traits<ForwardI>::value_type char_type;
00195   typedef CHAR_TRAITS<char_type> char_traits;
00196 
00197   string_cursor(ForwardI start, ForwardI finish)
00198     : start(start), finish(finish), sink_state(false)
00199     { }
00200 
00201   bool sink() const { return sink_state; }
00202 
00203   void forward() { ++start; }
00204 
00205   bool forward(const char_type &letter) {
00206     sink_state = !(*start == letter);
00207     ++start;
00208     return !sink_state;
00209   }
00210 
00211   bool first() { return !(start == finish); }
00212 
00213   bool next() { return false; }
00214 
00215   char_type letter() const { return *start; }
00216 
00217   state_type src() const { return start; }
00218 
00219   state_type aim() const {
00220     ForwardI tmp = start;
00221     return ++tmp;
00222   }
00223 
00224   bool aim_final() const {
00225     ForwardI tmp = start;
00226     return ++tmp == finish;
00227   }
00228   
00229   bool src_final() const { return start == finish; }
00230 
00231   bool operator==(const self &x) const { return start == x.start; }
00232 
00233   bool find(const char_type &letter) { return exists(letter); }
00234 
00235   // Warning: x must be part of the [start, finish) sequence:
00236   self& operator=(const ForwardI &x) { 
00237     start = x;
00238     return *this;
00239   }
00240 
00241   bool exists(const char_type &letter) const { return *start == letter; }
00242 
00243   tag_type src_tag() const { return tag_type(); }
00244   tag_type aim_tag() const { return tag_type(); }
00245 };
00246 
00247 
00248 template <>
00249 class string_cursor<const char*> : public forward_cursor_concept
00250 {
00251 protected:
00252   const char *p;
00253 
00254 public:
00255   typedef plain         char_traits;
00256   typedef empty         tag_type;
00257   typedef const char*   state_type;
00258   typedef string_cursor self;
00259   typedef char          char_type;
00260   
00261   typedef tag_type       Tag;
00262   typedef state_type      State;
00263   typedef char_type Alphabet;
00264 
00265   string_cursor(const char *p_ = NULL)
00266     : p(p_)
00267     { }
00268 
00269   bool sink() const { return p == NULL; }
00270   
00271   bool exists(char letter) const { return *p == letter; }
00272 
00273   void forward() { ++p; }
00274 
00275   bool forward(char letter) {
00276     p = (*p == letter) ? p + 1 : NULL;
00277     return !sink();
00278   }
00279 
00280   bool first() { return *p != 0; }
00281 
00282   bool next() { return false; }
00283 
00284   char letter() const { return *p; }
00285 
00286   State src() const { return p; }
00287 
00288   State aim() const { return p + 1; }
00289 
00290   bool src_final() const { return *p == 0; }
00291 
00292   bool aim_final() const { return p[1] == 0; }
00293 
00294   bool operator==(const string_cursor<const char*> &x) const {
00295     return p == x.p;
00296   }
00297 
00298   bool find(char letter) { return exists(letter); }
00299   
00300   self& operator= (const char *c) {
00301     p = c;
00302     return *this;
00303   }
00304 
00305   tag_type src_tag() const { return tag_type(); }
00306   tag_type aim_tag() const { return tag_type(); }
00307 };
00308 
00309 template <typename InputIterator>
00310 string_cursor<InputIterator> stringc(InputIterator first, InputIterator last) {
00311   return string_cursor<InputIterator>(first, last);
00312 }
00313 
00314 inline
00315 string_cursor<const char*> stringc(const char *s) {
00316   return string_cursor<const char*>(s);
00317 }
00318 
00319 inline
00320 string_cursor<const char*> stringc(char *s) {
00321   return string_cursor<const char*>(s);
00322 }
00323 
00324 template <typename FA>
00325 class transition_iterator : public iterator<forward_iterator_tag,
00326   triple<typename FA::state_type, typename FA::char_type, 
00327   typename FA::state_type> >, public bfirst_cursor_concept
00328 {
00329 protected:
00330   const FA *A;
00331   typename FA::const_iterator q;
00332   typename FA::edges_type::const_iterator t;
00333 
00334 public:
00335   typedef transition_iterator     self;
00336   typedef typename FA::state_type state_type;
00337   typedef typename FA::char_type  char_type;
00338   typedef typename FA::tag_type   tag_type;
00339   typedef bfirst_cursor_concept   concept;
00340 
00341   transition_iterator()
00342   { }
00343 
00344   transition_iterator(const FA &a)
00345     : A(&a), q(a.begin()) {
00346     if (q != a.end()) {
00347       t = a.delta2(*q).begin();
00348       if (t == a.delta2(*q).end())
00349         while (++q != a.end()) {
00350           t = A->delta2(*q).begin();
00351           if (t != A->delta2(*q).end()) break;
00352         }
00353     }
00354   }
00355   
00356   transition_iterator(const FA &a, typename FA::const_iterator i)
00357     : A(&a), q(i) {
00358     if (q != a.end()) {
00359       t = a.delta2(*q).begin();
00360       if (t == a.delta2(*q).end())
00361         while (++q != a.end()) {
00362           t = A->delta2(*q).begin();
00363           if (t != A->delta2(*q).end()) break;
00364         }
00365     }
00366   }
00367 
00368   transition_iterator(typename FA::const_iterator i) // end of range
00369     : q(i)
00370   { }
00371 
00372   state_type sink_state() const {
00373     return FA::null_state;
00374   }
00375 
00376   triple<state_type, char_type, state_type> operator*() const {
00377     return make_triple(*q, t->first, t->second);
00378   }
00379 
00380 //  const triple<state_type, char_type, state_type>* operator->() const {
00381 //    return &triple<state_type, char_type, state_type>(*q, t->first, t->second);
00382 //  }
00383 
00384   self& operator++() {
00385     if (++t == A->delta2(*q).end())
00386       while (++q != A->end()) {
00387         t = A->delta2(*q).begin();
00388         if (t != A->delta2(*q).end()) break;
00389       }
00390     return *this;
00391   }
00392 
00393   self operator++(int) {
00394     transition_iterator tmp = *this;
00395     ++*this;
00396     return tmp;
00397   }
00398   
00399   state_type src() const { return *q; }
00400 
00401   state_type aim() const { return t->second; }
00402 
00403   const tag_type& src_tag() const { return A->tag(*q); }
00404 
00405   const tag_type& aim_tag() const { return A->tag(t->second); }
00406 
00407   bool src_final() const { return A->final(*q); }
00408 
00409   bool aim_final() const { return A->final(t->second); }
00410 
00411   char_type letter() const { return t->first; }
00412 
00413   bool next() {
00414     typename FA::const_iterator tmp = q;
00415     ++*this;
00416     return q == tmp;
00417   }
00418 
00419   bool operator==(const self &x) const { return q == x.q; }
00420 };
00421 
00422 template <typename FA>
00423 inline
00424 transition_iterator<FA> transitioni(const FA &a) {
00425   return transition_iterator<FA>(a);
00426 }
00427 
00428 template <typename FA>
00429 inline
00430 transition_iterator<FA> 
00431 transitioni(const FA &a, typename FA::const_iterator i) {
00432   return transition_iterator<FA>(a, i);
00433 }
00434 
00435 // create a path along its way if it does not exist already:
00436 template <typename DFA>
00437 class build_cursor : public cursor_concept
00438 {
00439 public:
00440   typedef typename DFA::state_type  state_type;
00441   typedef typename DFA::tag_type    tag_type;
00442   typedef typename DFA::char_type   char_type;
00443   typedef typename DFA::char_traits char_traits;
00444   typedef build_cursor              self;
00445 
00446   build_cursor()
00447   { }
00448 
00449   build_cursor(DFA &dfa)
00450     : A(&dfa) { 
00451     q = A->initial();
00452     if (q == DFA::null_state) {
00453       q = A->new_state();
00454       A->initial(q);
00455     }
00456   }
00457 
00458   build_cursor(DFA &dfa, state_type i)
00459     : A(&dfa), q(i) 
00460   { }
00461 
00462   self& operator=(state_type p) {
00463     if (p == DFA::null_state)
00464       p = A->new_state();
00465     q = p;
00466     return *this;
00467   }
00468 
00469   state_type      src() const        { return q; }
00470   bool            src_final() const  { return A->final(q); }
00471   void            src_final(bool b)  { A->final(q) = b; }
00472   bool            sink() const       { return src() == sink_state(); }
00473   state_type      sink_state() const { return DFA::null_state; }
00474   const tag_type& src_tag() const    { return A->tag(q); }
00475   tag_type&       src_tag()          { return A->tag(q); }
00476 
00477   bool exists(const char_type &c) const { 
00478     return A->delta1(q, c) != DFA::null_state; 
00479   }
00480 
00481   bool forward(const char_type &c) {
00482     state_type tmp = q;
00483     q = A->delta1(q, c);
00484     if (q == DFA::null_state) {
00485       q = A->new_state();
00486       A->set_trans(tmp, c, q);
00487     }
00488     return true;
00489   }    
00490 
00491   bool forward(const char_type &c, state_type a) {
00492     state_type tmp = q;
00493     q = A->delta1(q, c);
00494     if (q != a) {
00495       if (q == DFA::null_state)
00496         A->set_trans(tmp, c, a);
00497       else
00498         A->change_trans(tmp, c, a); // return false ?
00499       q = a;
00500     }
00501     return true;
00502   }    
00503 
00504   bool operator==(const self &x) const { return q == x.q; }
00505 
00506 protected:
00507   DFA *A;
00508   state_type q;
00509 };
00510 
00511 template <typename DFA>
00512 inline
00513 build_cursor<DFA> buildc(DFA &a) { return build_cursor<DFA>(a); }
00514 
00515 } // namespace astl
00516                 
00517 #endif // ASTL_LANGUAGE_ALGORITHMS

Generated on Sun Mar 8 02:41:34 2009 for ASTL by  doxygen 1.5.7.1