00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef ASTL_DFA_COMPACT_H
00023 #define ASTL_DFA_COMPACT_H
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef _MSC_VER
00033 #include <unistd.h>
00034 #include <sys/mman.h>
00035 #include <fcntl.h>
00036 #include <errno.h>
00037 #endif
00038
00039 #include <vector>
00040 #include <utility>
00041 #include <functional>
00042 #include <iostream>
00043
00044 using namespace std;
00045
00046 namespace astl {
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064 template <class DFA, class tag_mapping = identity<typename DFA::tag_type> >
00065 class DFA_compact : public DFA_concept
00066 {
00067 public:
00068 typedef typename DFA::char_traits char_traits;
00069 typedef typename DFA::char_type char_type;
00070 typedef unsigned long state_type;
00071 typedef typename tag_mapping::result_type tag_type;
00072
00073 private:
00074 typedef mapable_vector<int>::size_type size_type;
00075 typedef mapable_vector<bool> set_F;
00076 typedef DFA_compact self;
00077
00078 public:
00079 class const_iterator : public iterator<forward_iterator_tag, state_type>
00080 {
00081 friend class DFA_compact<DFA, tag_mapping>;
00082 private:
00083 state_type state;
00084 const mapable_vector<char_type> *ml;
00085 const mapable_vector<state_type> *ma;
00086
00087 const_iterator(state_type x, const mapable_vector<char_type> *a,
00088 const mapable_vector<state_type> *s)
00089 : state(x), ml(a), ma(s) { }
00090
00091 public:
00092
00093 const_iterator() : ml(NULL), ma(NULL) { }
00094 state_type operator * () const { return (state); }
00095 const_iterator& operator ++ ()
00096 {
00097 for (; state < ml->size(); ++state) {
00098 if ((*ml)[state] == (char_type) 0 && (*ma)[state] != 0) {
00099
00100 ++state;
00101 break;
00102 }
00103 }
00104 return (*this);
00105 }
00106 const_iterator operator ++ (int)
00107 {
00108 const_iterator tmp = *this;
00109 ++(*this);
00110 return (tmp);
00111 }
00112 const_iterator& operator = (const const_iterator &x)
00113 {
00114 state = x.state;
00115 ml = x.ml;
00116 ma = x.ma;
00117 return (*this);
00118 }
00119 bool operator == (const const_iterator &x) const {
00120 return (state == x.state);
00121 }
00122 bool operator != (const const_iterator &x) const {
00123 return (!(*this == x));
00124 }
00125 };
00126
00127 class edges_type
00128 {
00129 private:
00130 state_type s;
00131 const mapable_vector<char_type> *ml;
00132 const mapable_vector<state_type> *ma;
00133
00134 public:
00135 edges_type(state_type st, const mapable_vector<char_type> &l,
00136 const mapable_vector<state_type> &a)
00137 : s(st), ml(&l), ma(&a) { }
00138
00139 typedef char_type key_type;
00140 typedef pair<const key_type, state_type> value_type;
00141 typedef less<char_type> key_compare;
00142
00143 typedef value_type* pointer;
00144 typedef value_type& reference;
00145 typedef const value_type& const_reference;
00146 typedef state_type size_type;
00147 typedef typename DFA::edges_type::difference_type difference_type;
00148
00149 class value_compare : public binary_function<value_type, value_type, bool>
00150 {
00151 friend class edges_type;
00152
00153 protected:
00154 key_compare comp;
00155 value_compare(key_compare c) : comp(c) { }
00156
00157 public:
00158 bool operator () (const value_type& x, const value_type& y) {
00159 return (comp(x.first, y.first));
00160 }
00161 };
00162
00163 class const_iterator : std::iterator<bidirectional_iterator_tag, value_type>
00164 {
00165 public:
00166 const_iterator(const char_type *l,
00167 const state_type *a, size_type pos_ = 0)
00168 : ml(l), ma(a), pos(pos_) { }
00169
00170 typedef pair<char_type, state_type> value_type;
00171
00172 const_iterator() { }
00173 const_iterator(const const_iterator& i)
00174 : ml(i.ml), ma(i.ma), pos(i.pos)
00175 { }
00176
00177 bool operator == (const const_iterator &x) const {
00178 return ml == x.ml;
00179 }
00180
00181 value_type operator * () const {
00182 return typename const_iterator::value_type(*ml, *ma);
00183 }
00184
00185 const_iterator& operator ++ ()
00186 {
00187 for (++pos, ++ml, ++ma; pos < self::char_traits::size;
00188 ++pos, ++ml, ++ma)
00189 if (*ml != (char_type) 0 &&
00190 (size_t) self::char_traits::to_int_type(*ml) == pos)
00191 break;
00192 return (*this);
00193 }
00194 const_iterator operator ++ (int)
00195 {
00196 const_iterator tmp = (*this);
00197 ++(*this);
00198 return tmp;
00199 }
00200 const_iterator& operator -- ()
00201 {
00202 for (--pos, --ml, --ma; pos != 0; --pos, --ml, --ma)
00203 if (self::char_traits::to_int_type(*ml) == pos)
00204 break;
00205 return (*this);
00206 }
00207 const_iterator operator -- (int)
00208 {
00209 const_iterator tmp = (*this);
00210 --(*this);
00211 return tmp;
00212 }
00213
00214 protected:
00215 const char_type *ml;
00216 const state_type *ma;
00217 size_type pos;
00218 };
00219
00220
00221
00222 typedef const_iterator iterator;
00223
00224 edges_type() : s((state_type) 0), ml(NULL), ma(NULL) { }
00225
00226 key_compare key_comp() const
00227 {
00228 key_compare tmp;
00229 return tmp;
00230 }
00231
00232 value_compare value_comp() const { return (value_compare(key_comp())); }
00233
00234 state_type operator [] (const key_type& k) const
00235 {
00236 state_type q = s + self::char_traits::to_int_type(k);
00237 if ((*ml)[q] == k) return ((*ma)[q]);
00238 return (state_type) 0;
00239 }
00240
00241 const_iterator begin() const {
00242 const_iterator r(ml->begin() + s, ma->begin() + s, 0);
00243 return ++r;
00244 }
00245
00246 const_iterator end() const {
00247 return const_iterator(ml->begin() + s + self::char_traits::size,
00248 ma->begin() + s + self::char_traits::size,
00249 self::char_traits::size);
00250 }
00251
00252 size_type size() const
00253 {
00254 typename self::char_traits::int_type offset;
00255 typename self::char_traits::int_type result = 0;
00256
00257 for (offset = 0; offset < self::char_traits::size; ++offset)
00258 if ((*ml)[s + offset] != (char_type) 0
00259 && self::char_traits::to_int_type((*ml)[s + offset]) == offset)
00260 ++result;
00261
00262 return result;
00263 }
00264
00265 bool empty() const { return size() == 0; }
00266
00267 const_iterator find(const key_type &k) const
00268 {
00269 long mapped = self::char_traits::to_int_type(k);
00270 state_type result = s + mapped;
00271 if ((*ml)[result] == k)
00272 return const_iterator(ml->begin() + result, ma->begin() + result, mapped);
00273 return end();
00274 }
00275
00276 size_type count(const key_type& k) const {
00277 return (*ml)[s + self::char_traits::to_int_type(k)] == k ? 1 : 0;
00278 }
00279
00280 iterator lower_bound(const key_type &k) const { return find(k); }
00281
00282 iterator upper_bound(const key_type &k) const {
00283 iterator tmp = find(k);
00284 return tmp == end() ? tmp : ++tmp;
00285 }
00286
00287 pair<iterator, iterator> equal_range(const key_type &k) const
00288 {
00289 return make_pair(lower_bound(k), upper_bound(k));
00290 }
00291 };
00292
00293
00294 protected:
00295 mapable_vector<char_type> letters;
00296 mapable_vector<state_type> aims;
00297 mapable_vector<tag_type> tags;
00298 state_type I;
00299 unsigned long _state_count;
00300 unsigned long _trans_count;
00301
00302 #ifndef _MSC_VER
00303 int fd;
00304 #endif
00305 set_F F;
00306
00307 bool free_cell(state_type s) const {
00308 return (letters[s] == (char_type) 0 && aims[s] == (state_type) 0);
00309 }
00310
00311 bool tagged_cell(state_type s) const {
00312 return (letters[s] == (char_type) 0 && aims[s] != (state_type) 0);
00313 }
00314
00315 public:
00316 static const state_type null_state = 0;
00317
00318 ostream& write(ostream &out) const
00319 {
00320 out.write((const char *) &I, sizeof(I));
00321 out.write((const char *) &_state_count, sizeof(_state_count));
00322 out.write((const char *) &_trans_count, sizeof(_trans_count));
00323 letters.write(out);
00324 aims.write(out);
00325 tags.write(out);
00326 F.write(out);
00327 return out;
00328 }
00329
00330 istream& read(istream &in)
00331 {
00332 in.read((char *)&I, sizeof(I));
00333 in.read((char *)&_state_count, sizeof(_state_count));
00334 in.read((char *)&_trans_count, sizeof(_trans_count));
00335 letters.read(in);
00336 aims.read(in);
00337 tags.read(in);
00338 F.read(in);
00339 return in;
00340 }
00341
00342 #ifndef _MSC_VER
00343
00344 bool mmap(const string &path)
00345 {
00346 fd = open(path.c_str(), O_RDONLY);
00347 if (fd == -1) return false;
00348 return mmap(fd);
00349 }
00350
00351 bool mmap(int f)
00352 {
00353 ::read(f, (void*) &I, sizeof(I));
00354 ::read(f, (void*) &_state_count, sizeof(_state_count));
00355 ::read(f, (void*) &_trans_count, sizeof(_trans_count));
00356 if (!letters.mmap(f) || !aims.mmap(f) || !tags.mmap(f) || !F.mmap(f))
00357 return false;
00358 return true;
00359 }
00360
00361 void munmap()
00362 {
00363 letters.munmap();
00364 aims.munmap();
00365 tags.munmap();
00366 F.munmap();
00367 }
00368 #endif
00369
00370 state_type initial() const {
00371 return I;
00372 }
00373
00374 set_F::const_reference final(state_type s) const {
00375 return F[s];
00376 }
00377
00378 state_type delta1(state_type s, const char_type &l) const
00379 {
00380 state_type q = s + self::char_traits::to_int_type(l);
00381 if (letters[q] == l) return aims[q];
00382 return null_state;
00383 }
00384
00385 unsigned long state_count() const {
00386 return _state_count;
00387 }
00388
00389 unsigned long trans_count() const {
00390 return _trans_count;
00391 }
00392
00393 const tag_type& tag(state_type s) const {
00394 return tags[aims[s - 1]];
00395 }
00396
00397 edges_type delta2(state_type s) const {
00398 return edges_type(s, letters, aims);
00399 }
00400
00401 const_iterator begin() const
00402 {
00403 if (state_count() > 0)
00404 return const_iterator((state_type) 1, &letters, &aims);
00405 else
00406 return end();
00407 }
00408
00409 const_iterator end() const {
00410 return const_iterator(letters.size(), &letters, &aims);
00411 }
00412
00413 float density() const
00414 {
00415 unsigned long occupied_cells = 0;
00416 state_type i, fin = letters.size();
00417
00418 for (i = 0; i != fin; ++i)
00419 if (!free_cell(i))
00420 ++occupied_cells;
00421
00422 return ((float) occupied_cells / (float) fin);
00423 }
00424
00425 void stats(ostream &out = cout) const
00426 {
00427 out << "Matrix size : " << letters.size() << " cells ("
00428 << aims.size() * sizeof(state_type) + letters.size()
00429 * sizeof(char_type) + tags.size() * sizeof(tag_type) +
00430 F.size() * sizeof(bool) << " bytes)" << endl;
00431 out << "Matrix density : " << density() << endl;
00432 out << "States : " << state_count()
00433 << " (" << aims.size() * sizeof(state_type)
00434 << " bytes)" << endl;
00435 out << "Transitions : " << trans_count() << " ("
00436 << letters.size() * sizeof(char_type)
00437 << " bytes)" << endl;
00438 out << "Tags : " << tags.size() << " ("
00439 << tags.size() * sizeof(tag_type)
00440 << " bytes)" << endl;
00441 out << "Final states bool : " << F.size() << " ("
00442 << F.size() * sizeof(bool)
00443 << " bytes)" << endl;
00444 out << "Space waste ratio : " << (1. - density()) * 100. << "%" << endl;
00445 }
00446
00447 ~DFA_compact() {
00448 #ifndef _MSC_VER
00449 letters.munmap();
00450 aims.munmap();
00451 tags.munmap();
00452 F.munmap();
00453 if (fd > 0) close(fd);
00454 #endif
00455 }
00456
00457
00458 DFA_compact()
00459 #ifndef _MSC_VER
00460 : fd(0)
00461 #endif
00462 {
00463 I = null_state;
00464 }
00465
00466 DFA_compact(istream &in)
00467 #ifndef _MSC_VER
00468 : fd(0)
00469 #endif
00470 {
00471 I = null_state;
00472 read(in);
00473 }
00474
00475 DFA_compact(const DFA &dfa, long opt_value = 2)
00476 : letters((unsigned int) (dfa.state_count() * 2 + dfa.trans_count())),
00477 aims((unsigned int) (dfa.state_count() * 2 + dfa.trans_count())),
00478 tags((unsigned int) 1), I(null_state), _state_count(0), _trans_count(0),
00479 #ifndef _MSC_VER
00480 fd(0),
00481 #endif
00482 F((unsigned int) dfa.state_count())
00483 {
00484 if (dfa.state_count() > 0) {
00485 tag_mapping map_tag;
00486 typename DFA::const_iterator i, end_i = dfa.end();
00487 vector<state_type> old2new(dfa.state_count());
00488 state_type pos, solid_first_free = 0UL, first_free = 0UL;
00489 typename DFA::edges_type::const_iterator trans, edg_end;
00490 long count = 0;
00491
00492 for (i = dfa.begin(); i != end_i; ++i) {
00493 ++count;
00494 ++_state_count;
00495
00496 if (count == opt_value) {
00497 for(++solid_first_free; solid_first_free < letters.size() &&
00498 !free_cell(solid_first_free); ++solid_first_free);
00499 count = 0;
00500 }
00501
00502 while (solid_first_free + self::char_traits::size
00503 >= letters.size()) {
00504
00505 letters.resize(2 * letters.size(), (char_type) 0);
00506 aims.resize(2 * aims.size(), (state_type) 0);
00507 }
00508
00509 first_free = solid_first_free;
00510
00511 while (1) {
00512 pos = first_free + 1;
00513 typename DFA::edges_type edg = dfa.delta2(*i);
00514 edg_end = edg.end();
00515
00516
00517 for (trans = edg.begin(); trans != edg_end; ++trans)
00518 if (!free_cell(pos + self::char_traits::to_int_type((*trans).first)))
00519 break;
00520
00521 if (trans == edg_end) {
00522
00523 if (old2new.size() <= (unsigned long) *i)
00524 old2new.resize((unsigned long) (*i) * 2, null_state);
00525 old2new[(unsigned long) *i] = pos;
00526
00527
00528 tag_type t_tmp = map_tag(dfa.tag(*i));
00529 typename mapable_vector<tag_type>::iterator position
00530 = find(tags.begin() + 1, tags.end(), t_tmp);
00531 aims[pos - 1] = (state_type) (position - tags.begin());
00532 if (position == tags.end())
00533 tags.push_back(t_tmp);
00534
00535
00536 if (dfa.final(*i)) {
00537 if (F.size() <= pos) F.resize(pos * 2, false);
00538 F[pos] = true;
00539 }
00540
00541
00542 unsigned long mapped_letter;
00543 for (trans = edg.begin(); trans != edg_end; ++trans) {
00544 mapped_letter =
00545 self::char_traits::to_int_type((*trans).first);
00546 letters[pos + mapped_letter] = (*trans).first;
00547 aims[pos + mapped_letter] =
00548 (state_type) (*trans).second;
00549 ++_trans_count;
00550 }
00551 if (first_free == solid_first_free)
00552 count = opt_value - 1;
00553 break;
00554 }
00555
00556
00557 for(++first_free; first_free < letters.size() &&
00558 !free_cell(first_free); ++first_free);
00559
00560 while (first_free + self::char_traits::size >=
00561 letters.size()) {
00562
00563 letters.resize(2 * letters.size(), (char_type) 0);
00564 aims.resize(2 * aims.size(), (state_type) 0);
00565 }
00566 }
00567 }
00568
00569
00570 state_type last_one = 0;
00571 for (pos = 0; pos != letters.size(); ++pos)
00572 if (letters[pos] != (char_type) 0)
00573 aims[pos] = old2new[aims[pos]];
00574 else
00575 if (aims[pos] != 0)
00576 last_one = pos + 1 + self::char_traits::size;
00577
00578 if (dfa.initial() != dfa.null_state)
00579 I = old2new[(unsigned long) dfa.initial()];
00580 letters.resize(last_one + 1, (char_type) 0);
00581 aims.resize(last_one + 1, (state_type) 0);
00582 F.resize(last_one + 1, false);
00583 }
00584 }
00585 };
00586
00587 template <typename DFA, typename TagMapping>
00588 const typename DFA_compact<DFA, TagMapping>::state_type
00589 DFA_compact<DFA, TagMapping>::null_state;
00590
00591 }
00592
00593 #endif // DFA_COMPACT_H
00594
00595
00596
00597
00598