00001 #ifndef ASTL_DFA_MIN_HASH_H
00002 #define ASTL_DFA_MIN_HASH_H
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include <astl.h>
00015 #include <hash.h>
00016 #include <match.h>
00017 #include <iostream>
00018 #include <set>
00019 #include <vector>
00020 #include <functional>
00021 #include <string>
00022 #include <cstring>
00023 #include <bitset>
00024 #include <iterator>
00025 #include <utility>
00026
00027 using namespace std;
00028
00029 namespace astl {
00030
00053 template <typename CharTraits = plain>
00054 class DFA_min_hash : public DFA_min<CharTraits, hash_tag>
00055 {
00056 public:
00057 typedef DFA_min<CharTraits, hash_tag> super;
00058 typedef DFA_min_hash self;
00059 typedef typename super::state_type state_type;
00060 typedef typename super::char_type char_type;
00061 typedef typename super::char_traits char_traits;
00062
00068 template <typename InputI>
00069 int hash_value(InputI first, InputI last) const {
00070 return astl::hash_value(forwardc(*this), first, last);
00071 }
00072
00077 int hash_value(const basic_string<char_type> &s) const {
00078 return hash_value(s.begin(), s.end());
00079 }
00080
00085 template <typename OutputI>
00086 int value_hash(int w, OutputI out) const {
00087 return value_hash(forwardc(*this), w, out);
00088 }
00089
00093 basic_string<char_type> value_hash(int w) const {
00094 basic_string<char_type> r;
00095 r.reserve(height(super::initial()));
00096 return value_hash(w, back_inserter(r));
00097 }
00098
00105 template <class ForwardI>
00106 bool insert(ForwardI, ForwardI);
00107
00113 bool insert(const basic_string<char_type> &s) {
00114 return insert(s.begin(), s.end());
00115 }
00116 };
00117
00118 template <class CharTraits>
00119 template <class ForwardI>
00120 bool DFA_min_hash<CharTraits>::insert(ForwardI first, ForwardI last)
00121 {
00122 {
00123 cursor<DFA_min_hash<CharTraits> > c(*this);
00124 if (match(c, first, last)) return false;
00125 }
00126 if (!super::ready) super::prepare();
00127 stack_cursor<forward_cursor<self> > c(forwardc(*this));
00128 bool duplicating;
00129
00130 for(duplicating = false; first != last; ++first) {
00131 if (!duplicating)
00132 pool(height(c.src())).erase(c.src());
00133
00134 if (!c.find(*first)) {
00135 set_trans(c.src(), *first, super::new_state());
00136 duplicating = true;
00137 c.find(*first);
00138 }
00139 else
00140 if (super::Q[c.aim()]->degree_in() > 1) {
00141 change_trans(c.src(), *first, duplicate_state(c.aim()));
00142 duplicating = true;
00143 c.find(*first);
00144 }
00145 c.forward();
00146 }
00147
00148 if (!duplicating)
00149 pool(height(c.src())).erase(c.src());
00150
00151 if (!final(c.src())) {
00152 tag(c.src()).wc() += 1;
00153 final(c.src()) = true;
00154 }
00155
00156 unsigned int h = height(c.src());
00157
00158 for(; c.backward(); ) {
00159
00160
00161
00162 pair<typename super::container::iterator, bool> i =
00163 pool(height(c.aim())).insert(c.aim());
00164 if (i.second == false) {
00165 state_type tmp = c.aim();
00166
00167 change_trans(c.src(), c.letter(), *i.first);
00168 tag(*i.first).wc() = tag(tmp).wc();
00169 del_state(tmp);
00170 }
00171 tag(c.src()).wc() += 1;
00172 h = max(height(c.src()), h+1);
00173 height(c.src(), h);
00174 }
00175 pool(height(super::initial())).insert(super::initial());
00176 ++super::wc_;
00177 return true;
00178 }
00179
00180
00181 template <typename Transition>
00182 inline
00183 bool operator<(const state<Transition, hash_tag> &x, const state<Transition, hash_tag> &y) {
00184 if (x.tag().wc() < y.tag().wc()) return true;
00185 if (x.tag().wc() == y.tag().wc()) {
00186 if (x.final() < y.final()) return true;
00187 if (x.final() == y.final()) {
00188 if (x.size() < y.size()) return true;
00189 else
00190 if (x.size() == y.size()) {
00191 typename state<Transition, hash_tag>::const_iterator i = x.begin(), j = x.end(), k = y.begin();
00192 while (i != j)
00193 if (*i < *k) return true;
00194 else if (*k < *i) return false;
00195 else { ++i; ++k; }
00196 return false;
00197 }
00198 }
00199 }
00200 return false;
00201 }
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225 }
00226
00227 #endif
00228
00229