00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef LILIAN_FA_COMPRESS_H
00023 #define LILIAN_FA_COMPRESS_H
00024
00025
00026 #include <concept.h>
00027 #include <alphabet.h>
00028 #include <tag.h>
00029 #include <tools.h>
00030
00031 #ifndef _MSC_VER
00032 #include <unistd.h>
00033 #include <sys/mman.h>
00034 #include <fcntl.h>
00035 #include <errno.h>
00036 #endif
00037
00038 #include <vector>
00039 #include <utility>
00040 #include <functional>
00041 #include <algorithm>
00042
00043 using namespace std;
00044
00045
00046
00047
00048
00049
00050 namespace astl {
00051
00052 template <typename CharTraits = plain, typename TagType = empty_tag>
00053 class FA_compress : public FA_concept
00054 {
00055 public:
00056 typedef CharTraits char_traits;
00057 typedef typename CharTraits::char_type char_type;
00058 typedef unsigned int state_type;
00059 typedef TagType tag_type;
00060 typedef FA_concept concept;
00061
00062 #ifndef _MSC_VER
00063 protected:
00064 #else
00065 public:
00066 #endif
00067 typedef mapable_vector<bool> set_F;
00068 typedef FA_compress self;
00069
00070 mapable_vector<TagType> tags;
00071 mapable_vector<char_type> letters;
00072 mapable_vector<state_type> aims_tags;
00073
00074 public:
00075 class const_iterator : public iterator<forward_iterator_tag, state_type>
00076 {
00077 protected:
00078 state_type state;
00079 const mapable_vector<char_type> *ml;
00080
00081 public:
00082 const_iterator() {}
00083 const_iterator(state_type x, const mapable_vector<char_type> &a)
00084 : state(x), ml(&a)
00085 { }
00086
00087 state_type operator*() const {
00088 return state;
00089 }
00090
00091 const_iterator& operator++() {
00092 while ((*ml)[++state] != (char_type) 0);
00093 return *this;
00094 }
00095
00096 const_iterator operator ++ (int) {
00097 const_iterator tmp = *this;
00098 ++(*this);
00099 return tmp;
00100 }
00101
00102 bool operator==(const const_iterator &x) const {
00103 return (state == x.state);
00104 }
00105 bool operator != (const const_iterator &x) const {
00106 return (!(*this == x));
00107 }
00108 };
00109
00110 class edges_type
00111 {
00112 private:
00113
00114
00115 state_type q;
00116 const FA_compress *fa;
00117
00118 public:
00119 edges_type()
00120 { }
00121
00122 edges_type(state_type s, const FA_compress &f)
00123
00124
00125
00126 : q(s + 1), fa(&f)
00127 { }
00128
00129 typedef char_type key_type;
00130 typedef pair<const key_type, state_type> value_type;
00131 typedef less<char_type> key_compare;
00132
00133 typedef value_type* pointer;
00134 typedef value_type& reference;
00135 typedef const value_type& const_reference;
00136 typedef state_type size_type;
00137 typedef int difference_type;
00138
00139 struct value_compare
00140 : public binary_function<value_type, value_type, bool>
00141 {
00142 key_compare comp;
00143 value_compare(key_compare c) : comp(c) { }
00144 bool operator ()(const value_type& x, const value_type& y) {
00145 return comp(x.first, y.first);
00146 }
00147 };
00148
00149 class const_iterator
00150 : iterator<bidirectional_iterator_tag, value_type>
00151 {
00152 protected:
00153 typename mapable_vector<char_type>::const_iterator a;
00154 typename mapable_vector<state_type>::const_iterator q;
00155
00156 public:
00157 typedef pair<char_type, state_type> value_type;
00158
00159 const_iterator()
00160 { }
00161
00162 const_iterator(typename mapable_vector<char_type>::const_iterator i,
00163 typename mapable_vector<state_type>::const_iterator j)
00164 : a(i), q(j)
00165 { }
00166
00167
00168 const_iterator(typename mapable_vector<char_type>::const_iterator i)
00169 : a(i)
00170 { }
00171
00172 bool operator==(const const_iterator &x) const {
00173 return (*a == (char_type) 0 && *x.a == (char_type) 0)
00174
00175 || (a == x.a && q == x.q);
00176 }
00177
00178 pair<char_type, state_type> operator *() const {
00179 return make_pair(*a, *q);
00180 }
00181
00182 const_iterator& operator ++() {
00183 ++a; ++q;
00184 return *this;
00185 }
00186
00187 const_iterator operator ++(int) {
00188 const_iterator tmp = (*this);
00189 ++(*this);
00190 return tmp;
00191 }
00192
00193 const_iterator& operator --() {
00194 --a; --q;
00195 return (*this);
00196 }
00197 const_iterator operator --(int) {
00198 const_iterator tmp = (*this);
00199 --(*this);
00200 return tmp;
00201 }
00202 };
00203
00204
00205
00206 key_compare key_comp() const {
00207 key_compare tmp;
00208 return (tmp);
00209 }
00210
00211 value_compare value_comp() const {
00212 return value_compare(key_comp());
00213 }
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225 const_iterator begin() const {
00226 return const_iterator(fa->letters.begin() + q, fa->aims_tags.begin() + q);
00227
00228 }
00229
00230 const_iterator end() const {
00231 return const_iterator(fa->letters.end() - 1);
00232
00233 }
00234
00235
00236
00237
00238
00239
00240
00241
00242 bool empty() const {
00243 return fa->letters[q] == (char_type) 0;
00244
00245 }
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267 const_iterator lower_bound(const key_type &k) const {
00268 const_iterator j;
00269 for(j = begin(); j != end() && (*j).first < k; ++j);
00270 return j;
00271 }
00272
00273
00274 const_iterator upper_bound(const key_type &k) const {
00275 const_iterator j;
00276 for(j = begin(); j != end() && !(k < (*j).first); ++j);
00277 return j;
00278 }
00279
00280 pair<const_iterator, const_iterator> equal_range(const key_type &k) const
00281 {
00282 return make_pair(lower_bound(k), upper_bound(k));
00283 }
00284 };
00285
00286
00287 protected:
00288 state_type initial_;
00289 set<state_type> I;
00290 unsigned int _state_count;
00291 unsigned int _trans_count;
00292
00293 int fd;
00294 set_F F;
00295
00296 public:
00297 #ifndef _MSC_VER
00298 static const state_type null_state = 0;
00299 #else
00300 static const state_type null_state;
00301 #endif
00302
00303 bool write(const string &path) {
00304 ofstream out(path.c_str(), ios::out | ios::binary);
00305 if (out && write(out)) {
00306 out.close();
00307 return true;
00308 }
00309 return false;
00310 }
00311
00312 bool write(ostream &out) const
00313 {
00314 out.write((const char *) &initial_, sizeof(initial_));
00315 out.write((const char *) &_state_count, sizeof(_state_count));
00316 out.write((const char *) &_trans_count, sizeof(_trans_count));
00317 letters.write(out);
00318 aims_tags.write(out);
00319 tags.write(out);
00320 F.write(out);
00321 return (bool) out;
00322 }
00323
00324 bool read(const string &path) {
00325 ifstream in(path.c_str(), ios::in | ios::binary);
00326 if (in && read(in)) {
00327 in.close();
00328 return true;
00329 }
00330 return false;
00331 }
00332
00333 bool read(istream &in)
00334 {
00335 in.read((char *)&initial_, sizeof(initial_));
00336 in.read((char *)&_state_count, sizeof(_state_count));
00337 in.read((char *)&_trans_count, sizeof(_trans_count));
00338 I.clear();
00339 I.insert(initial_);
00340 return letters.read(in) && aims_tags.read(in) &&
00341 tags.read(in) && F.read(in);
00342 }
00343
00344 #ifndef _MSC_VER
00345 bool mmap(const string &path)
00346 {
00347 fd = open(path.c_str(), O_RDONLY);
00348 if (fd == -1) return false;
00349 return mmap(fd);
00350 }
00351
00352 bool mmap(int f)
00353 {
00354 ::read(f, (void*) &initial_, sizeof(initial_));
00355 ::read(f, (void*) &_state_count, sizeof(_state_count));
00356 ::read(f, (void*) &_trans_count, sizeof(_trans_count));
00357 I.clear();
00358 I.insert(initial_);
00359 return letters.mmap(f) && aims_tags.mmap(f)
00360 && tags.mmap(f) && F.mmap(f);
00361 }
00362
00363 void munmap()
00364 {
00365 initial_ = null_state;
00366 I.clear();
00367 letters.munmap();
00368 aims_tags.munmap();
00369 tags.munmap();
00370 F.munmap();
00371 }
00372 #endif
00373
00374 state_type initial() const {
00375 return initial_;
00376 }
00377
00378 const set<state_type>& initials() const {
00379 return I;
00380 }
00381
00382 set_F::const_reference final(state_type s) const {
00383 return F[s];
00384 }
00385
00386 state_type delta1(state_type s, char_type l) const
00387 {
00388 typename mapable_vector<char_type>::const_iterator a
00389 = letters.begin() + s + 1;
00390 for(; *a != (char_type) 0 && *a != l; ++a);
00391 return *a == (char_type) 0 ?
00392 null_state : aims_tags[a - letters.begin()];
00393 }
00394
00395 template <typename OutputIterator>
00396 OutputIterator delta1(state_type q, char_type c, OutputIterator x) const
00397 {
00398 typename mapable_vector<char_type>::const_iterator a
00399 = letters.begin() + q + 1;
00400 for(; *a != (char_type) 0 && *a != c; ++a);
00401 for(; *a != (char_type) 0 && *a == c; ++a, ++x)
00402 *x = aims_tags[a - letters.begin()];
00403 return x;
00404 }
00405
00406 unsigned int state_count() const {
00407 return _state_count;
00408 }
00409
00410 unsigned int trans_count() const {
00411 return _trans_count;
00412 }
00413
00414 const tag_type& tag(state_type s) const {
00415 return tags[aims_tags[s]];
00416 }
00417
00418
00419 tag_type& tag(state_type s) {
00420 return tags[aims_tags[s]];
00421 }
00422
00423 edges_type delta2(state_type s) const {
00424 return edges_type(s, *this);
00425
00426 }
00427
00428 const_iterator begin() const
00429 {
00430 return state_count() > 0 ?
00431 const_iterator((state_type) 1, letters) : end();
00432 }
00433
00434 const_iterator end() const {
00435 return const_iterator(letters.size() - 1, letters);
00436 }
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451 vector<char_type> del_trans(state_type q, state_type p) {
00452 typename mapable_vector<char_type>::iterator last
00453 = letters.begin() + q + 1;
00454 typename mapable_vector<char_type>::iterator first = last;
00455 for(; *last != (char_type) 0; ++last);
00456 vector<char_type> v;
00457 v.reserve(128);
00458 for(++last; *first != '\0';) {
00459 if (aims_tags[first - letters.begin()] == p) {
00460 v.push_back(*first);
00461 std::rotate(aims_tags.begin() + (first - letters.begin()),
00462 aims_tags.begin() + (first - letters.begin()) + 1,
00463 aims_tags.begin() + (last - letters.begin()) - 1);
00464
00465 std::rotate(first, first + 1, last);
00466 }
00467 else ++first;
00468 }
00469 return v;
00470 }
00471
00472 ~FA_compress() {
00473 #ifndef _MSC_VER
00474 letters.munmap();
00475 aims_tags.munmap();
00476 tags.munmap();
00477 F.munmap();
00478 if (fd > 0) close(fd);
00479 #endif
00480 }
00481
00482 FA_compress()
00483 : initial_(null_state), fd(0) {
00484 I.insert(initial_);
00485 }
00486
00487 FA_compress(istream &in)
00488 : initial_(null_state), fd(0) {
00489 read(in);
00490 }
00491
00492 void clear() {
00493 letters.clear();
00494 aims_tags.clear();
00495 tags.clear();
00496 F.clear();
00497 fd = 0;
00498 initial_ = null_state;
00499 I.clear();
00500 I.insert(initial_);
00501 }
00502
00503 template <typename BFirstCursor>
00504 void compress(BFirstCursor i, bool tag_compression = true) {
00505 compress(i, BFirstCursor(), tag_compression);
00506 }
00507
00508 template <typename BFirstCursor>
00509 void compress(BFirstCursor i, BFirstCursor j, bool tag_compression = true) {
00510 clear();
00511 if (i == j) return;
00512 tags.reserve(8192);
00513 letters.reserve(8192);
00514 letters.push_back((char_type) 0);
00515 aims_tags.reserve(8192);
00516 aims_tags.push_back(0);
00517 F.reserve(8192);
00518 F.push_back(false);
00519 #ifndef _MSC_VER
00520 typedef typename BFirstCursor::state_type bstate_type;
00521 #else
00522 typedef BFirstCursor::state_type bstate_type;
00523 #endif
00524 map<bstate_type, safe<state_type> > old2new;
00525 vector<bstate_type> new2old;
00526 new2old.push_back(bstate_type());
00527 bstate_type ii = i.src();
00528 #ifndef _MSC_VER
00529 map<bstate_type, pair<typename BFirstCursor::tag_type, bool> > sinks;
00530 #else
00531 map<bstate_type, pair<BFirstCursor::tag_type, bool> > sinks;
00532 #endif
00533
00534 while (i != j) {
00535 safe<state_type> &q = old2new[i.src()];
00536 if (q == null_state) {
00537 sinks.erase(i.src());
00538 q = letters.size();
00539 letters.push_back((char_type) 0);
00540 new2old.push_back(bstate_type());
00541 F.push_back(i.src_final());
00542 typename mapable_vector<tag_type>::iterator k = tags.end();
00543 tag_type tmp_tag;
00544 tmp_tag = i.src_tag();
00545 if (tag_compression)
00546 k = find(tags.begin(), tags.end(), tmp_tag);
00547
00548 if (k == tags.end()) {
00549 tags.push_back(tmp_tag);
00550 k = tags.end() - 1;
00551 }
00552
00553 aims_tags.push_back(k - tags.begin());
00554
00555 set<pair<char_type, bstate_type> > tmp;
00556 do {
00557 tmp.insert(make_pair(i.letter(), i.aim()));
00558 safe<state_type> &p = old2new[i.aim()];
00559 if (p == null_state)
00560
00561 sinks.insert(make_pair(i.aim(), make_pair(i.aim_tag(),
00562 i.aim_final())));
00563 }
00564 while (i.next());
00565
00566 for(typename set<pair<char_type, bstate_type> >::const_iterator jj
00567 = tmp.begin(); jj != tmp.end(); ++jj) {
00568 letters.push_back(jj->first);
00569 new2old.push_back(jj->second);
00570 F.push_back(false);
00571 aims_tags.push_back(0);
00572 }
00573 }
00574 else
00575 while (i.next());
00576 }
00577
00578 for(unsigned int k = 1; k < new2old.size(); ++k)
00579 if (letters[k] != (char_type) 0) {
00580 safe<state_type> &p = old2new[new2old[k]];
00581 if (p == null_state) {
00582
00583 p = letters.size();
00584 letters.push_back((char_type) 0);
00585 new2old.push_back(bstate_type());
00586 #ifndef _MSC_VER
00587 pair<typename BFirstCursor::tag_type, bool> pp = sinks[new2old[k]];
00588 #else
00589 pair<BFirstCursor::tag_type, bool> pp = sinks[new2old[k]];
00590 #endif
00591 F.push_back(pp.second);
00592 typename mapable_vector<tag_type>::iterator k = tags.end();
00593 tag_type tmp;
00594 tmp = pp.first;
00595 if (tag_compression)
00596 k = find(tags.begin(), tags.end(), tmp);
00597 if (k == tags.end()) {
00598 tags.push_back(tmp);
00599 k = tags.end() - 1;
00600 }
00601 aims_tags.push_back(k - tags.begin());
00602 }
00603 aims_tags[k] = p;
00604 }
00605
00606 letters.push_back((char_type) 0);
00607 I.clear();
00608
00609
00610
00611
00612
00613
00614
00615 initial_ = old2new[ii];
00616 I.insert(initial_);
00617 _state_count = old2new.size();
00618 _trans_count = letters.size() - _state_count - 2;
00619 }
00620 };
00621
00622 #ifndef _MSC_VER
00623 template <typename T, typename U>
00624 const typename FA_compress<T, U>::state_type
00625 FA_compress<T, U>::null_state;
00626 #else
00627 template <typename T, typename U>
00628 const typename FA_compress<T, U>::state_type
00629 FA_compress<T, U>::null_state = 0;
00630 #endif
00631
00632 }
00633
00634 #endif // ASTL_DFA_COMPACT_H
00635