tools.h

00001 /*
00002  * ASTL - the Automaton Standard Template Library.
00003  * C++ generic components for Finite State Automata handling.
00004  * Copyright (C) 2000-2003 Vincent Le Maout (vincent.lemaout@chello.fr).
00005  *
00006  * This library is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * This library is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with this library; if not, write to the Free Software
00018  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00019  *
00020  */
00021 
00022 #ifndef ASTL_TOOLS_H
00023 #define ASTL_TOOLS_H
00024 
00025 #ifdef _MSC_VER
00026 // don't understand this WC++ warning, let's kill it:
00027 #pragma warning( disable : 4800 ) // forcing value to bool true or false
00028 #endif
00029 
00030 
00031 #include <memory>
00032 #include <utility>
00033 #include <iostream>
00034 #include <fstream>
00035 #include <string>
00036 #include <algorithm>
00037 #include <vector>
00038 #include <list>
00039 #include <set>
00040 #include <bitset>
00041 #include <map>
00042 #include <iterator>
00043 #include <cstdio>
00044 #include <ctime>
00045 #include <cwchar>
00046 #include <functional>
00047 
00048 #ifndef _MSC_VER
00049 #include <unistd.h>   // getopt
00050 #include <sys/mman.h>
00051 #include <fcntl.h>
00052 #define POPEN popen
00053 #define PCLOSE pclose
00054 #define max__ max
00055 #define min__ min
00056 #else
00057 #include <cstdio>
00058 #define POPEN _popen
00059 #define PCLOSE _pclose
00060 #define max__ _MAX
00061 #define min__ _MIN
00062 #endif
00063 
00064 using namespace std;
00065 
00066 namespace astl {
00067 
00068 #if 0
00069   class multiple_horspool_finder
00070   {
00071     multiple_horspool_finder()
00072     { }
00073 
00074     multiple_horspool_finder(const vector<string> &s) {
00075       init(s);
00076     }
00077     
00078     void init(const vector<string> &s) {
00079       finders.clear();
00080       finders.reserve(s.size());
00081       for(vector<string>::const_iterator i = s.begin(); i != s.end(); ++i)
00082         finders.push_back(horspool_finder(*i));
00083     }
00084 
00085     // returns beg of match
00086     pair<const char*, const char*> find(const char *first, const char *last) const {
00087       while (true) {
00088         int shift = numeric_limits<int>::max();
00089         int match = 0;
00090         for(vector<horspool_finder>::const_iterator finder = 0; finder != finders.size(); ++finder) {
00091           const char *finish = first + finder->size() - 1;
00092           if (finish < last) {
00093             const int s = finder->step(finish, last);
00094             if (s < 0) 
00095               match = max(match, finder->size());
00096             else shift = min(shift, s);
00097           }
00098         }
00099         if (match != 0)
00100           return make_pair(first, first + finder->size());
00101         if (shift == numeric_limits<int>::max())
00102           break;
00103         first += shift;
00104       }
00105       return last; // no match
00106     }
00107 
00108     /*  
00109     fill(positions.begin(), positions.end(), first);
00110       int candidate = -1;
00111       for(vector<horspool_finder*>::size_type finder = 0; finder != finders.size(); ++finder) 
00112         if (positions[finder] < last - finders[finder]->size()) {
00113           const int shift = step(positions[finder], last);
00114           if (shift < 0) {
00115             if (candidate == -1 || positions[finder] < positions[candidate] ||
00116                 positions[finder] == positions[candidate] && finders[finder]->size() < finders[candidate]->size())
00117               candidate = finder;
00118           }
00119           else positions[finder] += shift;
00120         }
00121       if (candidate != -1)
00122         return positions[candidate];
00123     }
00124     */
00125 
00126   protected:
00127     vector<horspool_finder> finders;
00128   };
00129 #endif
00130 
00131   class horspool_finder
00132   {
00133   public:
00134     horspool_finder()
00135     { }
00136 
00137 
00138     horspool_finder(const std::string &s) {
00139       init(s);
00140     }
00141 
00142 
00143     void init(const std::string &s) {
00144       word = s;
00145       last_position = --word.end();
00146       shift.clear();
00147       shift.resize(256, s.size());
00148       shifts();
00149     }
00150 
00151 
00152 #if 0
00153     horspool_finder(const std::vector<bitset<256> > &s) {
00154       init(s);
00155     }
00156 
00157 
00158     void init(const std::string &s) {
00159       word.resize(s.size());
00160       for(std::string::size_type i = 0; i < s.size(); ++i)
00161         word[i].set((unsigned char) s[i]);
00162       init(word);
00163     }
00164 
00165     void init(const std::vector<bitset<256> > &s) {
00166       word = s;
00167       last_position = --word.end();
00168       shift.clear();
00169       shift.resize(256, s.size());
00170       shifts();
00171     }
00172 #endif
00173     unsigned int size() const { return word.size(); }
00174 
00175     std::string::const_iterator find(const std::string &s) const {
00176       return find(s.begin(), s.end());
00177     }
00178 
00179 //     const char* find(const char *text, char end) const {
00180 //       for(text = std::find(text, text + size() - 1, end); 
00181 //        *text != end; 
00182 //        text = std::find(text, text + shift[(unsigned char) *text], end)) {
00183 //      const char *x = text;
00184 //      for(std::string::const_iterator y = last_position; *x == *y; --x, --y)
00185 //        if (y == word.begin())
00186 //          return text - (size() - 1);
00187 //       }
00188 //       return text;
00189 //     }
00190 
00191 
00192     // returns beg of match
00193     template <typename RandomAccessI>
00194     RandomAccessI find(RandomAccessI first, RandomAccessI last) const {
00195       for(first += size() - 1; first < last; first += shift[(unsigned char) *first]) {
00196         RandomAccessI x = first;
00197         for(std::string::const_iterator y = last_position; *x == *y; --x, --y)
00198           if (y == word.begin())
00199             return first - (size() - 1);
00200       }
00201       return last;
00202     }
00203 
00204 #if 0
00205     // returns beg of match
00206     template <typename RandomAccessI>
00207     RandomAccessI find(RandomAccessI first, RandomAccessI last) const {
00208       for(first += size() - 1; first < last; first += shift[(unsigned char) *first]) {
00209         RandomAccessI x = first;
00210         for(std::vector<bitset<256> >::const_iterator y = last_position; y->test((unsigned char) *x); --x, --y)
00211           if (y == word.begin())
00212             return first - (size() - 1);
00213       }
00214       return last;
00215     }
00216 #endif
00217 //     template <typename RandomAccessI>
00218 //     int step(RandomAccessI first, RandomAccessI last) const {
00219 //       const int s = shift[(unsigned char) *first]; 
00220 //       for(std::string::const_iterator y = last_position; *first == *y; --first, --y)
00221 //      if (y == word.begin())
00222 //        return -s;
00223 //       return s;
00224 //     }
00225 
00226   protected:
00227 
00228 
00229     std::string word;
00230     std::vector<int> shift;
00231     std::string::const_iterator last_position;
00232 
00233 
00234 #if 0
00235     std::vector<bitset<256> > word;
00236     std::vector<int> shift;
00237     std::vector<bitset<256> >::const_iterator last_position;
00238 #endif
00239 
00240     void shifts() {
00241       for (std::string::size_type i = 0; i < word.size() - 1; ++i)
00242         shift[(unsigned char) word[i]] = word.size() - 1 - i;
00243     }
00244 
00245 #if 0
00246     void shifts() {
00247       for(std::string::size_type i = 0; i < word.size() - 1; ++i)
00248         for(int j = 0; j < 256; ++j)
00249           if (word[i].test(j))
00250             shift[j] = word.size() - 1 - i;
00251     }
00252 #endif
00253 
00254   };
00255 
00256   class boyer_moore_finder
00257   {
00258   public:
00259     boyer_moore_finder()
00260     { }
00261 
00262     boyer_moore_finder(const std::string &s) { 
00263       init(s);
00264     }
00265 
00266     void init(const std::string &s) {
00267       word = s;
00268       bmGs.resize(s.size());
00269       bmBc.clear();
00270       bmBc.resize(256, s.size());
00271       good_suffix_shifts();
00272       bad_character_shifts();
00273     }
00274 
00275     unsigned int size() const { return word.size(); }
00276 
00277     std::string::const_iterator find(const std::string &s) const {
00278       return find(s.begin(), s.end());
00279     }
00280 
00281     // returns beg of match
00282     template <typename RandomAccessI>
00283     RandomAccessI find(RandomAccessI first, RandomAccessI last) const {
00284       const int m = size() - 1;
00285       last -= m;
00286       int i;
00287       while (first < last) {
00288         for (i = m; i >= 0 && word[i] == first[i]; --i);
00289         if (i < 0)
00290           return first;
00291         else 
00292           first += std::max(bmGs[i], bmBc[(unsigned char) first[i]] - m + i);
00293       }
00294       return last + m;
00295     }
00296 
00297   protected:
00298     std::string word;
00299     std::vector<int> bmGs, bmBc;
00300 
00301     void bad_character_shifts() {
00302       for (std::string::size_type i = 0; i < word.size() - 1; ++i)
00303         bmBc[(unsigned char) word[i]] = word.size() - i - 1;
00304     }
00305  
00306     void suffixes(std::vector<int>& suff) {
00307       const int m = (int) word.size();
00308       int f = 0, g, i;    
00309       suff[m - 1] = m;
00310       g = m - 1;
00311       for (i = m - 2; i >= 0; --i) {
00312         if (i > g && suff[i + m - 1 - f] < i - g)
00313           suff[i] = suff[i + m - 1 - f];
00314         else {
00315           if (i < g)
00316             g = i;
00317           f = i;
00318           while (g >= 0 && word[g] == word[g + m - 1 - f])
00319             --g;
00320           suff[i] = f - g;
00321         }
00322       }
00323     }
00324 
00325     void good_suffix_shifts() {
00326       const int m = (int) word.size();
00327       int i, j;
00328       std::vector<int> suff(m);
00329       suffixes(suff);
00330       for (i = 0; i < m; ++i)
00331         bmGs[i] = m;
00332       j = 0;
00333       for (i = m - 1; i >= 0; --i)
00334         if (suff[i] == i + 1)
00335           for (; j < m - 1 - i; ++j)
00336             if (bmGs[j] == m)
00337               bmGs[j] = m - 1 - i;
00338       for (i = 0; i <= m - 2; ++i)
00339         bmGs[m - 1 - suff[i]] = m - 1 - i;
00340     }
00341   };
00342 
00343   // Split a line into a (possibly empty) vector of (possibly empty) strings
00344   inline
00345   int cut(const string &line, vector<string> &fields, char sep = '\t')
00346   {
00347     fields.clear();
00348     string::size_type j;
00349     for(string::size_type i = 0; i < line.size(); i = j + 1) {
00350       j = line.find(sep, i);
00351       if (j == string::npos) j = line.size();
00352       fields.push_back(line.substr(i, j - i));
00353     }
00354     return (int) fields.size();
00355   }
00356 
00357   inline
00358   int cut(const wstring &line, vector<wstring> &fields, wchar_t sep = L'\t')
00359   {
00360     fields.clear();
00361     wstring::size_type j;
00362     for(wstring::size_type i = 0; i < line.size(); i = j + 1) {
00363       j = line.find(sep, i);
00364       if (j == wstring::npos) j = line.size();
00365       fields.push_back(line.substr(i, j - i));
00366     }
00367     return (int) fields.size();
00368   }
00369 
00370   inline
00371   int cut(const char *line, vector<string> &fields, char sep = '\t')
00372   {
00373     fields.clear();
00374     const char *j = line;
00375     const char *i = line;
00376     do {
00377       if (*i == sep || (*i == 0 && i != line)) {
00378         fields.push_back(string(j, i));
00379         j = i + 1;
00380       }
00381     } while (*i++ != '\0');
00382     return (int) fields.size();
00383   }
00384 
00385   // Define class smart pointer with following properties:
00386   // 1. A smart pointer allocates automatically the pointed object
00387   // 2. A smart pointer shares the pointed object with its copies
00388   // 3. A smart pointer deallocates the pointed object when no other
00389   //    smart pointer points to the object
00390   // 4. operator== returns true if both pointers point to the same
00391   //    object
00392 
00393   template <typename T>
00394   class smart_ptr
00395   {
00396   protected:
00397     struct object
00398     {
00399       T data;
00400       unsigned int count;
00401 
00402       object()
00403         : count(1)
00404       { }
00405     };
00406 
00407     object *p;
00408 
00409   public:
00410     typedef smart_ptr self;
00411 
00412     smart_ptr()
00413       : p(new object)
00414     { }
00415 
00416     smart_ptr(const self &x)
00417       : p(x.p) {
00418       ++p->count;
00419     }
00420 
00421     ~smart_ptr() { if (--p->count == 0) delete p; }
00422 
00423     self& operator=(const self &x) {
00424       if (p != x.p) {
00425         if (--p->count == 0) delete p;
00426         p = x.p;
00427         ++p->count;
00428       }
00429       return *this;
00430     }
00431 
00432     void reset() {
00433       this->~smart_ptr<T>();
00434       p = new object;
00435     }
00436 
00437     bool     operator==(const self &x) const { return p == x.p;  }
00438     T&       operator*() { return p->data; }
00439     const T& operator*() const { return p->data; }
00440     T*       operator->() { return &(p->data);  }
00441     const T* operator->() const { return &(p->data); }
00442   };
00443 
00444 
00445   // Make sure a C++ builtin data type is constructed with a default value:
00446   // #ifndef _MSC_VER
00447   template <class T, T value = (T) 0>
00448   // #else
00449   // template <class T>
00450   // #endif
00451   struct safe
00452   {
00453     T q;
00454     safe(T p)
00455       : q(p)
00456     { }
00457 
00458     // #ifndef _MSC_VER
00459     safe()
00460       : q(value)
00461     { }
00462     // #else
00463     //   safe()
00464     //     : q(0)
00465     //   { }
00466     // #endif
00467 
00468     operator T () const { return q; }
00469     operator T ()       { return q; }
00470 
00471     safe& operator=(T p) {
00472       q = p;
00473       return *this;
00474     }
00475 
00476     safe& operator+=(T p) {
00477       q += p;
00478       return *this;
00479     }
00480 
00481     safe& operator++() {
00482       ++q;
00483       return *this;
00484     }
00485 
00486     //   safe& operator+=(int i) {
00487     //     q += i;
00488     //     return *this;
00489     //   }
00490 
00491     safe operator+(safe &x) const {
00492       return safe(q + x.q);
00493     }
00494 
00495     bool operator<(safe &x) const {
00496       return q < x.q;
00497     }
00498 
00499     safe& operator--() {
00500       --q;
00501       return *this;
00502     }
00503   };
00504 
00505   // Equivalent to popen(command, "r") (RTFM)
00506   // remove the last \n if any
00507   static inline string execute(const string &command)
00508   {
00509     FILE *f = POPEN(command.c_str(), "r");
00510     if (f == NULL) return "";
00511     string r;
00512     size_t n;
00513     for(char buffer[1024];
00514         (n = fread(buffer, sizeof(char), 1023, f)) > 0;
00515         r += buffer)
00516       buffer[n] = '\0';
00517     PCLOSE(f);
00518     if (r[r.size() - 1] == '\n')
00519       r.erase(r.size() - 1);
00520     return r;
00521   }
00522 
00523   // template <typename T>
00524   // class mapable_vector
00525   // {
00526   // protected:
00527   //   off_t offset; // offset in file for mmap
00528   //   T   *first, *last, *eos;
00529   //   ostream *output;
00530 
00531   // public:
00532   //   typedef size_t   size_type;
00533   //   typedef T        value_type;
00534   //   typedef T*       iterator;
00535   //   typedef const T* const_iterator;
00536   //   typedef T&       reference;
00537   //   typedef const T& const_reference;
00538   //   typedef mapable_vector self;
00539 
00540   //   mapable_vector()
00541   //     : offset(0), first(NULL), last(NULL), eos(NULL), output(NULL) 
00542   //   { }
00543 
00544   //   mapable_vector(ostream &out) // push back direct to disk
00545   //     : offset(sizeof(size_type)), first(NULL), last(NULL), eos(NULL), output(&out) { 
00546   //     size_type dummy = ~0; // reserved for size
00547   //     output->write((const char*) &dummy, sizeof(size_type));
00548   //   }
00549 
00550   //   mapable_vector(const self &x)
00551   //     : offset(0), first(NULL), last(NULL), eos(NULL), output(x.output) {
00552   //     if (x.first != NULL) {
00553   //       first = new T[x.capacity()];
00554   //       last = copy(x.first, x.last, first);
00555   //       eos = first + x.capacity();
00556   //     }
00557   //   }
00558 
00559   //   mapable_vector(size_type s)
00560   //     : offset(0), output(NULL) {
00561   //     first = new T[s];
00562   //     last = eos = first + s;
00563   //     memset(first, 0, s * sizeof(T));
00564   //   }
00565 
00566   //   void clear() {
00567   // #ifndef _MSC_VER
00568   //     if (mapped())
00569   //       munmap();
00570   //     else
00571   // #endif
00572   //       {
00573   //    delete [] first;
00574   //    first = last = eos = NULL;
00575   //       }
00576   //   }
00577 
00578   //   self& operator=(const self &x) {
00579   //     if (&x != this) {
00580   //       clear();  // set first to NULL
00581   //       if (!x.empty()) first = new T[x.size()];
00582   //       last = eos = first + x.size();
00583   //       copy(x.first, x.last, first);
00584   //     }
00585   //     return *this;
00586   //   }
00587 
00588   //   size_type size() const { return last - first; }
00589 
00590   //   bool empty() const { return size() == 0; }
00591 
00592   //   T& back() { return last[-1]; }
00593 
00594   //   const T& back() const { return last[-1]; }
00595 
00596   //   T& front() { return *first; }
00597 
00598   //   const T& front() const { return *first; }
00599 
00600   //   T& operator[](size_type s) { return first[s]; }
00601 
00602   //   const T& operator[](size_type s) const { return first[s]; }
00603 
00604   //   bool write(ostream& out) const {
00605   //     size_type s = size();
00606   //     out.write((const char*) &s, sizeof(size_type));
00607   //     out.write((const char*) first, sizeof(T) * s);
00608   //     return (bool) out;
00609   //   }
00610 
00611   //   bool read(istream& in) {
00612   //     size_type s;
00613   //     in.read((char*) &s, sizeof(size_type));
00614   //     if (in.gcount() != sizeof(size_type)) return false;
00615   //     clear();
00616   //     if (s > 0)
00617   //       first = new T[s];
00618   //     else first = 0;
00619   //     last = eos = first + s;
00620   //     in.read((char*) first, sizeof(T) * s);
00621   //     return (size_type) in.gcount() == sizeof(T) * s;
00622   //   }
00623 
00624   // #ifndef _MSC_VER
00625   //   bool mmap(int file_desc) {
00626   //     off_t off = lseek(file_desc, 0, SEEK_CUR);
00627   //     delete [] first;
00628   //     size_type p;
00629   //     ::read(file_desc, &p, sizeof(size_type));
00630   //     off += sizeof(size_type);
00631   //     offset = off % getpagesize();
00632   //     char *ptr = (char*) ::mmap(0, p * sizeof(T),
00633   //                           PROT_READ, MAP_PRIVATE, file_desc, off - offset);
00634   //     if (ptr == (char*) -1) return false;
00635   //     first = (T*) (ptr + offset);
00636   //     last = eos = first + p;
00637   //     lseek(file_desc, sizeof(T) * p, SEEK_CUR);
00638   //     return true;
00639   //   }
00640 
00641   //   void munmap() {
00642   //     if (mapped()) {
00643   //       ::munmap(((char*) first) - offset, size() * sizeof(T));
00644   //       first = last = eos = NULL;
00645   //       offset = -1;
00646   //     }
00647   //   }
00648   // #endif
00649 
00650   //   size_type capacity() const { return eos - first; }
00651 
00652   //   void resize(size_type s, const T& x) {
00653   //     if (s < size()) erase(begin() + s, end());
00654   //     else insert(end(), s - size(), x);
00655   //   }
00656 
00657   //   void erase(iterator i, iterator j) {
00658   //     copy(j, last, first);
00659   //     last -= j - i;
00660   //   }
00661 
00662   //   void insert(iterator i, size_type s, const T& x = T()) {
00663   //     if (s + size() > capacity()) {
00664   //       T *p = new T[s + size()];
00665   //       copy(first, i, p);
00666   //       copy_backward(i, last, p + (i - first) + s);
00667   //       i = p + (i - first);
00668   //       last = p + size() + s;
00669   //       delete [] first;
00670   //       first = p;
00671   //       eos = last;
00672   //     }
00673   //     else {
00674   //       copy_backward(i, last, i + s);
00675   //       last += s;
00676   //     }
00677   //     fill(i, i + s, x);
00678   //   }
00679 
00680   //   template <typename InputI>
00681   //   void push_back(InputI first, InputI last) {
00682   //     for(; first != last; ++first)
00683   //       push_back(*first);
00684   //   }
00685 
00686   //   void finalize() {
00687   //     // UGLY HACK:
00688   //     size_type s = offset;
00689   //     output->seekp(-offset, ios_base::cur);
00690   //     output->write((const char*)&s, sizeof(s));
00691   //     output->seekp(offset - sizeof(s), ios_base::cur);
00692   //   }
00693 
00694   //   iterator begin() { return first; }
00695 
00696   //   iterator end() { return last; }
00697 
00698   //   const_iterator begin() const { return first; }
00699 
00700   //   const_iterator end() const { return last; }
00701 
00702   //   void push_back(const T& x) {
00703   //     if (output != NULL) {
00704   //       output->write((const char*) &x, sizeof(x));
00705   //       offset += sizeof(x);
00706   //     }
00707   //     else {
00708   //       if (last == eos) {
00709   //    T *p = new T[max__((size_type) 1, size()) * 2];
00710   //    copy(first, last, p);
00711   //    delete [] first;
00712   //    eos = p + 2 * max__((size_type) 1, size());
00713   //    last = p + (last - first);
00714   //    first = p;
00715   //       }
00716   //       *last++ = x;
00717   //     }
00718   //   }
00719 
00720   //   void reserve(size_type i) {
00721   //     if (i > capacity()) {
00722   //       T *p = new T[i];
00723   //       copy(first, last, p);
00724   //       delete [] first;
00725   //       last = p + (last - first);
00726   //       first = p;
00727   //       eos = first + i;
00728   //     }
00729   //   }
00730 
00731   //   bool mapped() const { return offset != -1; }
00732 
00733   //   ~mapable_vector() { clear(); }
00734   // };
00735 
00736   // // A compact table maps integers to static arrays of element of type T
00737   // // To use the read/write functionalities, T must be a POD type
00738   // // For example, say t is a compact table, the sequence #4 is defined
00739   // // by the range t[4],t[5] or t.range(4)
00740   // // a mapped compact table is a static object
00741   // template <typename T, typename SizeType = unsigned int>
00742   // class compact_table
00743   // {
00744   // protected:
00745   //   mapable_vector<T>        data;
00746   //   mapable_vector<SizeType> index;
00747   //   int fd;          // file descriptor for memory mapping
00748   //   ostream *output; // push_back "direct to file"
00749 
00750   //   bool mapped() const { return fd > -1; }
00751 
00752   // public:
00753   //   typedef SizeType size_type;
00754   //   typedef compact_table<T> self;
00755 
00756   //   compact_table(ostream &out) 
00757   //     : data(out), fd(-1), output(&out) {
00758   //     index.push_back(0);
00759   //   }
00760 
00761   //   compact_table()
00762   //     : fd(-1), output(NULL) {
00763   //     index.push_back(0);
00764   //   }
00765 
00766   //   compact_table(const self &x)
00767   //     : data(x.data), index(x.index), fd(-1), output(x.output) {
00768   //   }
00769 
00770   //   void reserve(size_type s) { index.reserve(s); }
00771 
00772   //   ~compact_table() {
00773   // #ifndef _MSC_VER
00774   //     munmap();
00775   // #endif
00776   //   }
00777 
00778   //   void clear() {
00779   // #ifndef _MSC_VER
00780   //     munmap();
00781   // #endif
00782   //   }
00783 
00784   //   size_type size() const { return index.size() - 1; }
00785 
00786   //   bool empty() const { return size() == 0; }
00787 
00788   //   bool mmap(const string &path) {
00789   //     fd = open(path.c_str(), O_RDONLY);
00790   //     return fd != -1 && mmap(fd);
00791   //   }
00792 
00793   //   bool mmap(int f) {
00794   //     return data.mmap(f) && index.mmap(f);
00795   //   }
00796 
00797   // #ifndef _MSC_VER
00798   //   void munmap() {
00799   //     if (fd != -1) {
00800   //       data.munmap();
00801   //       index.munmap();
00802   //       close(fd);
00803   //       fd = -1;
00804   //     }
00805   //   }
00806   // #endif
00807 
00808   //   bool read(istream& i) {
00809   //     fd = -1;
00810   //     return data.read(i) && index.read(i);
00811   //   }
00812 
00813   //   bool write(ostream& o) const {
00814   //     return data.write(o) && index.write(o);
00815   //   }
00816 
00817   //   const T* operator[](size_type i) const { return &data[index[i]]; }
00818 
00819   //   pair<const T*, const T*> range(size_type i) const {
00820   //     return make_pair((*this)[i], (*this)[i + 1]);
00821   //   }
00822 
00823   //   template <typename OutputIterator>
00824   //   OutputIterator find_and_write(size_type i, OutputIterator x) const {
00825   //     return copy((*this)[i], (*this)[i + 1] - 1, x);
00826   //   }
00827 
00828   //   size_type capacity() const { return data.size(); }
00829 
00830   //   template <typename RandomAccessIterator>
00831   //   void push_back(RandomAccessIterator start, RandomAccessIterator finish)
00832   //   {
00833   //     data.push_back(start, finish);
00834   //     index.push_back(index.back() + (finish - start));
00835   //   }
00836 
00837   // public:
00838   //   bool finalize() {
00839   //     data.finalize();
00840   //     return index.write(*output);
00841   //   }
00842 
00843   //   class const_iterator : public iterator<random_access_iterator_tag,
00844   //                                     pair<const T*, const T*> >
00845   //   {
00846   //   public:
00847   //     const_iterator(typename mapable_vector<SizeType>::const_iterator x,
00848   //               const mapable_vector<T> *d)
00849   //       : i(x), data(d)
00850   //     { }
00851 
00852   //     pair<const T*, const T*> operator*() const {
00853   //       return make_pair(&(*data)[i[0]], &(*data)[i[1]]);
00854   //     }
00855 
00856   //     const_iterator& operator++() {
00857   //       ++i;
00858   //       return *this;
00859   //     }
00860 
00861   //     const_iterator operator++(int) {
00862   //       const_iterator tmp = *this;
00863   //       ++*this;
00864   //       return tmp;
00865   //     }
00866 
00867   //     bool operator==(const const_iterator &x) const { return i == x.i; }
00868 
00869   //   protected:
00870   //     typename mapable_vector<SizeType>::const_iterator i;
00871   //     const mapable_vector<T> *data;
00872   //   };
00873 
00874   //   const_iterator begin() const {
00875   //     return const_iterator(index.begin(), data);
00876   //   }
00877 
00878   //   const_iterator end() const {
00879   //     return const_iterator(index.end() - 1, data);
00880   //   }
00881   // };
00882 
00883   // EQUIVALENT WIN32 DE MMAP
00884   // typedef struct
00885   //  {
00886   //    // This receives a pointer within the current process at which the 
00887   //    // shared memory is located.
00888   //    // The same shared memory may reside at different addresses in other
00889   //    // processes which share it.
00890   //        void *      location;
00891   //        HANDLE      hFileMapping;
00892   // }mem_shared_struct, *mem_shared, *token;
00893 
00894   // mem_shared_struct   *token;
00895 
00896   // if ((*token = (mem_shared) malloc(sizeof(mem_shared_struct))) == NULL)     
00897   //      return RC_NOT_ENOUGH_MEMORY;
00898      
00899   // if (newmode == new_shared_create)
00900   //    (*token)->hFileMapping = CreateFileMapping((HANDLE) 0xFFFFFFFF, NULL,
00901   //                  PAGE_READWRITE,
00902   //                  0,
00903   //                 (DWORD) size,
00904   //                  (LPSTR) name);
00905   //  else
00906   //       (*token)->hFileMapping = OpenFileMapping(FILE_MAP_ALL_ACCESS,
00907   //                  FALSE,
00908   //                  (LPSTR) name);
00909   // if ((*token)->hFileMapping == NULL)
00910   // {
00911   //       free( *token );
00912   //       return RC_SHM_NOT_CREATED );
00913   // }
00914 
00915   // (*token)->location = MapViewOfFile((*token)->hFileMapping,
00916   //                                       FILE_MAP_READ | FILE_MAP_WRITE, 
00917   //                                       0, 0, 0);
00918   // if ((*token)->location == NULL)
00919   // {
00920   //       CloseHandle((*token)->hFileMapping);
00921   //             free(*token);
00922   //             return RC_OBJECT_NOT_CREATED;
00923   // }
00924 
00925   // A memory mappable vector used by DFA_compact. It does not cope with
00926   // the standard (minimal interface). Use with caution, once mapped, a
00927   // vector only supports read-only operations.
00928   // Type requirements: T shall be a POD type.
00929 
00930   template <typename T>
00931   class mapable_vector
00932   {
00933   protected:
00934     bool mapped;
00935     T   *first, *last, *eos;
00936 
00937   public:
00938     typedef unsigned int size_type;
00939     typedef T        value_type;
00940     typedef T*       iterator;
00941     typedef const T* const_iterator;
00942     typedef T&       reference;
00943     typedef const T& const_reference;
00944     typedef mapable_vector self;
00945 
00946     mapable_vector()
00947       : mapped(false), first(NULL), last(NULL), eos(NULL)
00948     { }
00949 
00950     mapable_vector(const self &x)
00951       : mapped(false), first(NULL), last(NULL), eos(NULL) {
00952       if (!x.empty()) {
00953         first = new T[x.capacity()];
00954         last = copy(x.first, x.last, first);
00955         eos = first + x.capacity();
00956       }
00957     }
00958 
00959     mapable_vector(size_type s)
00960       : mapped(false), first(NULL), last(NULL), eos(NULL) {
00961       if (s > 0) {
00962         first = new T[s];
00963         last = eos = first + s;
00964         memset(first, 0, s * sizeof(T));
00965       }
00966     }
00967 
00968     template <typename RandomAccessI>
00969     mapable_vector(RandomAccessI start, RandomAccessI finish)
00970       : mapped(false), first(finish - start > 0 ? new T[finish - start] : (T*) 0), 
00971         last(first + (finish - start)), eos(last) {
00972       copy(start, finish, first);
00973     }
00974 
00975     void swap(self &x) {
00976       std::swap(mapped, x.mapped);
00977       std::swap(first, x.first);
00978       std::swap(last, x.last);
00979       std::swap(eos, x.eos);
00980     }
00981 
00982     void clear() {
00983 #ifndef _MSC_VER
00984       if (mapped)
00985         munmap();
00986       else
00987 #endif
00988         {
00989           delete [] first;
00990           first = last = eos = NULL;
00991         }
00992     }
00993 
00994     self& operator=(const self &x) {
00995       if (&x != this) {
00996         clear();  // set first to NULL
00997         if (!x.empty()) first = new T[x.size()];
00998         last = eos = first + x.size();
00999         copy(x.first, x.last, first);
01000       }
01001       return *this;
01002     }
01003 
01004     size_type size() const     { return last - first; }
01005     size_type capacity() const { return eos - first; }
01006     bool      empty() const    { return size() == 0; }
01007 
01008     T&        back()        { return last[-1]; }
01009     const T&  back() const  { return last[-1]; }
01010 
01011     T&        front()       { return *first; }
01012     const T&  front() const { return *first; }
01013 
01014     iterator       begin()       { return first; }
01015     const_iterator begin() const { return first; }
01016 
01017     iterator       end()       { return last; }
01018     const_iterator end() const { return last; }
01019 
01020     T&        operator[](size_type s)       { return first[s]; }
01021     const T&  operator[](size_type s) const { return first[s]; }
01022 
01023     bool write(const string &path) {
01024       ofstream output(path.c_str(), ios::out | ios::binary);
01025       return output && write(output);
01026     }
01027 
01028     bool write(ostream& out) const {
01029       size_type s = size();
01030       out.write((const char*) &s, sizeof(s));
01031       out.write((const char*) first, sizeof(T) * s);
01032       return (bool) out;
01033     }
01034 
01035     bool read(const string &path) {
01036       ifstream input(path.c_str(), ios::in | ios::binary);
01037       return input && read(input);
01038     }
01039 
01040     bool read(istream& in) {
01041       size_type s;
01042       in.read((char*) &s, sizeof(s));
01043       if (in.gcount() != sizeof(s)) return false;
01044       clear();
01045       if (s > 0)
01046         first = new T[s];
01047       else first = 0;
01048       last = eos = first + s;
01049       in.read((char*) first, sizeof(T) * s);
01050       return (bool) in;
01051     }
01052 
01053 #ifndef _MSC_VER
01054     bool mmap(int file_desc) {
01055       delete [] first;
01056       size_type p;
01057       ::read(file_desc, &p, sizeof(size_type));
01058       first = (T*) ::mmap(0, p * sizeof(T),
01059                           PROT_READ, MAP_FILE | MAP_PRIVATE, file_desc, 0);
01060       if (first == (T*) -1) return false;
01061       last = eos = first + p;
01062       lseek(file_desc, p * sizeof(T), SEEK_CUR);
01063       mapped = true;
01064       return true;
01065     }
01066 
01067     void munmap() {
01068       if (mapped) {
01069         ::munmap((char*) first, size() * sizeof(T));
01070         first = last = eos = NULL;
01071         mapped = false;
01072       }
01073     }
01074 #endif
01075 
01076     void resize(size_type s, const T& x = T()) {
01077       if (s < size()) erase(begin() + s, end());
01078       else insert(end(), s - size(), x);
01079     }
01080 
01081     void erase(iterator i, iterator j) {
01082       copy(j, last, first);
01083       last -= j - i;
01084     }
01085 
01086     void insert(iterator i, size_type s, const T& x = T()) {
01087       if (s + size() > capacity()) {
01088         T *p = new T[(s + size()) * 2];
01089         copy(first, i, p);
01090         copy_backward(i, last, p + size() + s);
01091         i = p + (i - first);
01092         eos = p + (s + size()) * 2;
01093         last = p + s + size();
01094         delete [] first;
01095         first = p;
01096       }
01097       else {
01098         copy_backward(i, last, last + s);
01099         last += s;
01100       }
01101       fill(i, i + s, x);
01102     }
01103 
01104     void push_back(const T& x) {
01105       if (last == eos) {
01106         T *p = new T[max__((size_type) 1, size()) * 2];
01107         copy(first, last, p);
01108         delete [] first;
01109         eos = p + 2 * max__((size_type) 1, size());
01110         last = p + (last - first);
01111         first = p;
01112       }
01113       *last++ = x;
01114     }
01115 
01116     void reserve(size_type i) {
01117       if (i > capacity()) {
01118         T *p = new T[i];
01119         copy(first, last, p);
01120         delete [] first;
01121         last = p + (last - first);
01122         first = p;
01123         eos = first + i;
01124       }
01125     }
01126 
01127     ~mapable_vector() { clear(); }
01128   };
01129 
01130   // A compact table maps integers to static arrays of element of type T
01131   // To use the read/write functionalities, T must be a POD type
01132   // For example, say t is a compact table, the sequence #4 is defined
01133   // by the range t[4],t[5] or t.range(4)
01134   // a mapped compact table is a static object
01135   template <typename T, typename SizeType = unsigned int>
01136   class compact_table
01137   {
01138   protected:
01139     mapable_vector<SizeType> index;
01140     T *data, *eos;  // data and end-of-storage
01141     int fd;         // file descriptor for memory mapping
01142 
01143     bool mapped() const {
01144       return fd > -1;
01145     }
01146 
01147     void mapped(bool b) {
01148       if (b && fd < 0) fd = 0;
01149       else fd = -1;
01150     }
01151 
01152   public:
01153     typedef SizeType size_type;
01154     typedef compact_table<T> self;
01155     //typedef typename
01156     //  mapable_vector<typename mapable_vector<T>::size_type>::size_type size_type;
01157 
01158     compact_table()
01159       : index(1), data(NULL), eos(NULL), fd(-1)
01160     { }
01161 
01162     compact_table(const self &x)
01163       : index(x.index), data(NULL), eos(NULL), fd(-1) {
01164       if (x.eos - x.data > 0) {
01165         data = new T[x.eos - x.data];
01166         eos = copy(x.data, x.eos, data);
01167       }
01168     }
01169 
01170     void reserve(size_type s) {
01171       index.reserve(s);
01172     }
01173 
01174     ~compact_table() {
01175 #ifndef _MSC_VER
01176       if (mapped()) munmap();
01177       else
01178 #endif
01179         delete [] data;
01180     }
01181 
01182     void clear()
01183     {
01184 #ifndef _MSC_VER
01185       if (mapped()) munmap();
01186       else
01187 #endif
01188         {
01189           delete [] data;
01190           data = eos = NULL;
01191           index.clear();
01192           index.push_back(0);
01193         }
01194     }
01195 
01196     size_type size() const {
01197       return index.size() - 1;
01198     }
01199 
01200     bool empty() const {
01201       return size() == 0;
01202     }
01203 
01204     bool mmap(const string &path) {
01205       fd = open(path.c_str(), O_RDONLY);
01206       if (fd == -1) return false;
01207       return mmap(fd);
01208     }
01209 
01210     bool mmap(int f) {
01211       mapped(true);
01212       if (index.mmap(f)) {
01213         data = (T*) ::mmap(0, index.back() * sizeof(T),
01214                            PROT_READ, MAP_FILE | MAP_PRIVATE, f, 0);
01215         if (data == (T*) -1) return false;
01216         eos = data + index.back();
01217         return true;
01218       }
01219       return false;
01220     }
01221 
01222 #ifndef _MSC_VER
01223     void munmap() {
01224       index.munmap();
01225       if (fd > 0) close(fd);
01226       mapped(false);
01227     }
01228 #endif
01229 
01230     bool read(istream& i) {
01231       mapped(false);
01232       if (!index.read(i)) return false;
01233       data = new T[index.back()];
01234       i.read((char*) data, index.back() * sizeof(T));
01235       eos = data + index.back();
01236       return (size_type) i.gcount() == index.back() * sizeof(T);
01237     }
01238 
01239     bool write(ostream& o) const {
01240       index.write(o);
01241       o.write((char*) data, index.back() * sizeof(T));
01242       return (bool) o;
01243     }
01244 
01245     const T* operator[](size_t i) const {
01246       return data + index[i];
01247     }
01248 
01249     pair<const T*, const T*> range(size_t i) const {
01250       return make_pair((*this)[i], (*this)[i + 1]);
01251     }
01252 
01253     vector<T> find(size_t i) const {
01254       return vector<T>((*this)[i], (*this)[i + 1]);
01255     }
01256 
01257     template <typename OutputIterator>
01258     OutputIterator find_and_write(size_t i, OutputIterator x) const {
01259       return copy((*this)[i], (*this)[i + 1] - 1, x);
01260     }
01261 
01262     size_type capacity() const {
01263       return eos - data;
01264     }
01265 
01266     template <typename RandomAccessIterator>
01267     void push_back(RandomAccessIterator start, RandomAccessIterator finish)
01268     {
01269       if (capacity() - index.back() < (size_type) (finish - start)) {
01270         if (data == NULL) {
01271           data = new T[finish - start];
01272           eos = data + (finish - start);
01273           copy(start, finish, data);
01274         }
01275         else {
01276           size_type new_size;
01277           for(new_size = 2 * capacity();
01278               new_size - index.back() < (size_type) (finish - start);
01279               new_size *= 2);
01280 
01281           T *tmp = new T[new_size];
01282           copy(start, finish, copy(data, data + index.back(), tmp));
01283           delete [] data;
01284           data = tmp;
01285           eos = data + new_size;
01286         }
01287       }
01288       else copy(start, finish, data + index.back());
01289       index.push_back(index.back() + (finish - start));
01290     }
01291 
01292     class const_iterator : public iterator<random_access_iterator_tag,
01293                                            pair<const int*, const int*> >
01294     {
01295     public:
01296       const_iterator(typename mapable_vector<typename
01297                      mapable_vector<T>::size_type>::const_iterator x,
01298                      const T *d)
01299         : i(x), data(d)
01300       { }
01301 
01302       pair<const int*, const int*> operator*() const {
01303         return make_pair(data + i[0], data + i[1]);
01304       }
01305 
01306       const_iterator& operator++() {
01307         ++i;
01308         return *this;
01309       }
01310 
01311       const_iterator operator++(int) {
01312         const_iterator tmp = *this;
01313         ++*this;
01314         return tmp;
01315       }
01316 
01317       bool operator==(const const_iterator &x) const {
01318         return i == x.i;
01319       }
01320 
01321     protected:
01322       typename mapable_vector<typename mapable_vector<T>::size_type>::const_iterator i;
01323       const T *data;
01324     };
01325 
01326     const_iterator begin() const {
01327       return const_iterator(index.begin(), data);
01328     }
01329 
01330     const_iterator end() const {
01331       return const_iterator(index.end() - 1, data);
01332     }
01333   };
01334 
01335   // Command line parser
01336   // User Options: -m {yes|no} /* state marks */
01337   //               -r {matrix|map|bin|mtf|tr}
01338   //               -v verbose mode
01339   // Parameters:   number of inputs
01340   //               allowed representations for automata
01341   struct config
01342   {
01343     bool state_mark;
01344     string representation;
01345     bool verbose_mode;
01346     string aux_file;
01347     ifstream aux_input;
01348     FILE *aux_handle;
01349     ofstream output;
01350     int argc;
01351     char** argv;
01352     int inputs;
01353     int arg_pos;
01354     vector<string> representations;
01355     list<string> options;
01356     string use, description;
01357 
01358     void usage()
01359     {
01360       cerr << "Usage: " << argv[0] << " [-v] [-m {yes|no}]";
01361       if (representations.size() > 1) {
01362         cerr << " [-r {";
01363         vector<string>::const_iterator i = representations.begin();
01364         while (1) {
01365           cerr << *i;
01366           if (++i != representations.end()) cerr << "|";
01367           else break;
01368         }
01369         cerr << "}]";
01370       }
01371       cerr << " " << use;
01372       if (!use.empty()) cerr << " ";
01373       if (inputs == 2) cerr << "file";
01374       cerr << endl;
01375       cerr << "  " << description << endl;
01376       cerr << "   -v\tverbose output to stderr" << endl;
01377       cerr << "   -m\tmark states during traversal (disabled by default)" << endl;
01378       if (representations.size() > 1)
01379         cerr << "   -r\tchoose DFA representation, default is "
01380              << representations.front() << endl;
01381       if (representation.empty() && !representations.empty())
01382         representation = representations.front();
01383       exit(1);
01384     }
01385 
01386     config(int argc_, char** argv_, int inputs_, const string representations_ = "",
01387            const string &use_ = "", const string &desc = "")
01388       : state_mark(false), verbose_mode(false), argc(argc_), argv(argv_),
01389         inputs(inputs_), use(use_), description(desc) {
01390 
01391       aux_handle = NULL;
01392       string tmpr = representations_;
01393       if (tmpr == "all") tmpr = "map matrix bin tr mtf";
01394       string::iterator i, j = tmpr.begin();
01395       for(;;) {
01396         i = j;
01397         j = find(j, tmpr.end(), ' ');
01398         representations.push_back(string(i, j));
01399         if (j == tmpr.end()) break;
01400         ++j;
01401       }
01402 
01403       copy(argv_ + 1, argv_ + argc, back_inserter(options));
01404       arg_pos = 1;
01405       for(list<string>::iterator x = options.begin(); x != options.end(); ++arg_pos)
01406         if (x->operator[](0) != '-') {
01407           if (inputs > 1 && aux_file.empty()) {
01408             aux_file = *x;
01409             aux_input.open(aux_file.c_str());
01410             if (!aux_input) {
01411               string msg = argv_[0];
01412               msg = msg + ": " + aux_file;
01413               perror(msg.c_str());
01414               exit(2);
01415             }
01416             aux_handle = fopen(aux_file.c_str(), "rb");
01417             if (aux_handle == NULL) {
01418               perror(aux_file.c_str());
01419               exit(2);
01420             }
01421             options.erase(x++);
01422           }
01423           else break;
01424         }
01425         else if (*x == "-h")
01426           usage();
01427         else if (*x == "-v") {
01428           verbose_mode = true;
01429           options.erase(x++);
01430         }
01431         else if (*x == "-m") {
01432           options.erase(x++);
01433           if (x == options.end()) usage();
01434           ++arg_pos;
01435           if (*x == "yes") state_mark = true;
01436           else
01437             if (*x == "no") state_mark = false;
01438             else usage();
01439           options.erase(x++);
01440         }
01441         else if (*x == "-r") {
01442           if (representations.size() < 2) usage();
01443           options.erase(x++);
01444           if (x == options.end()) usage();
01445           ++arg_pos;
01446           if (find(representations.begin(), representations.end(), *x)
01447               != representations.end())
01448             representation = *x;
01449           else
01450             usage();
01451           options.erase(x++);
01452         }
01453         else ++x;
01454 
01455       if (representation.empty() && !representations.empty())
01456         representation = representations.front();
01457 
01458       if (inputs > 1 && aux_file.empty()) usage();
01459 
01460       if (verbose_mode) {
01461         char buffer[1024];
01462         FILE *f = POPEN("date", "r");
01463         buffer[max__((int) fread(buffer, sizeof(char), 1023, f) - 1, 0)] = '\0';
01464         PCLOSE(f);
01465         cerr << buffer << " \"";
01466         copy(argv, argv + argc, ostream_iterator<char*>(cerr, " "));
01467         cerr << "\"" << endl;
01468       }
01469     }
01470 
01471     ~config()
01472     {
01473       if (verbose_mode)
01474         cerr << argv[0] << ": " << clock() / (double) CLOCKS_PER_SEC
01475              << " s CPU time" << endl;
01476 
01477       if (!aux_file.empty())
01478         aux_input.close();
01479       if (aux_handle != NULL)
01480         fclose(aux_handle);
01481     }
01482 
01483     typedef list<string>::const_iterator const_iterator;
01484     const_iterator begin() const {
01485       return options.begin();
01486     }
01487     const_iterator end() const {
01488       return options.end();
01489     }
01490   };
01491 
01492   template <typename T1, typename T2 = T1, typename T3 = T2>
01493   struct triple : public std::pair<T1, T2>
01494   {
01495     T3 third;
01496 
01497     triple() : pair<T1, T2>(), third() { }
01498     triple(const T1 &t1, const T2 &t2, const T3 &t3)
01499       : std::pair<T1, T2>(t1, t2), third(t3) { }
01500   };
01501 
01502   template <typename T1, typename T2, typename T3>
01503   inline
01504   ostream& operator<<(ostream &out, const triple<T1, T2, T3> &x) {
01505     return out << '<' << x.first << ", " << x.second << ", "
01506                << x.third << '>';
01507   }
01508 
01509   template <class T1, class T2, class T3>
01510   inline bool operator<(const triple<T1, T2, T3> &x,
01511                         const triple<T1, T2, T3> &y) {
01512     if (x.first < y.first) return true;
01513     if (y.first < x.first) return false;
01514     if (x.second < y.second) return true;
01515     if (y.second < x.second) return false;
01516     return x.third < y.third;
01517   }
01518 
01519   template <class T1, class T2, class T3>
01520   inline bool operator==(const triple<T1, T2, T3> &x,
01521                          const triple<T1, T2, T3> &y) {
01522     return (x.first == y.first
01523             && x.second == y.second && x.third == y.third);
01524   }
01525 
01526   template <class T1, class T2, class T3>
01527   inline triple<T1, T2, T3>
01528   make_triple(const T1 &x, const T2 &y,const T3 &z) {
01529     return triple<T1, T2, T3>(x, y, z);
01530   }
01531 
01532   template <class T1, class T2, class T3>
01533   inline triple<T1, T2, T3> make_triple(const T1 &x,
01534                                         const pair<T2, T3> &y) {
01535     return triple<T1, T2, T3>(x, y.first, y.second);
01536   }
01537 
01538   // File iterator classes
01539   //  Descrition: - input and random access iterators on a file accessed
01540   //                through the system call read. It is implemented with a reference
01541   //                counting buffer allowing efficient by-value calls to function
01542   //              - Define a specialized version of the standard STL copy algorithm
01543   //
01544   //  Template Parameters: 1. The buffer size in bytes (default 8192)
01545   //                       2. A unary object function from char to any type used as
01546   //                          output filter (default identity<char>)
01547   //  Constructor args:    1. A file descriptor. The default constructor is used to
01548   //                          build a past-the-end iterator
01549   //  Remarks: No checks are made on the value returned by read. If an error occurs,
01550   //  i.e. read returns -1, the behavior is undefined. Also, all operations regarding
01551   //  file opening and closing are left up to the user.
01552   //
01553 
01554   // This one should be used to read "once and only once" streams (i.e. pipes), it is
01555   // an input iterator. It is then forbidden to iterate through a sequence with two
01556   // iterators simultaneously. The reference counting functionnality is only provided
01557   // for the copy constructor to be efficient.
01558 
01559   template <int BUFFER_SIZE = 8192>
01560   class finput_iterator : public iterator<input_iterator_tag, char>
01561   {
01562   public:
01563     typedef char value_type;
01564     typedef finput_iterator<BUFFER_SIZE> self;
01565 
01566 
01567     finput_iterator(FILE* handle)
01568       : b(new shared_buffer(handle)), c(b->buffer) {
01569       eob = c + fread(c, sizeof(char), BUFFER_SIZE, b->file);
01570     }
01571 
01572     finput_iterator()
01573       : b(NULL), eob(NULL)
01574     { }
01575 
01576     ~finput_iterator() {
01577       if (b && --b->references == 0)
01578         delete b;
01579     }
01580 
01581     finput_iterator(const self &x)
01582       : b(x.b), c(x.c), eob(x.eob) {
01583       if (b) ++b->references;
01584     }
01585 
01586     self& operator++ () {
01587       if (++c == eob) {
01588         c = b->buffer;
01589         eob = c + fread(c, sizeof(char), BUFFER_SIZE, b->file);
01590         if (c == eob) eob = NULL;  // end of file
01591       }
01592       return *this;
01593     }
01594 
01595     self operator++ (int) {
01596       self tmp = *this;
01597       ++(*this);
01598       return tmp;
01599     }
01600 
01601     value_type operator* () const {
01602       return *c;
01603     }
01604 
01605     bool operator==(const self &x) const {
01606       return eob == x.eob;   // only for end of range equality testing
01607     }
01608 
01609     bool operator!=(const self &x) const {
01610       return !(*this == x);
01611     }
01612 
01613   protected:
01614     struct shared_buffer
01615     {
01616       FILE* file;
01617       unsigned int references;
01618       char buffer[BUFFER_SIZE];
01619 
01620       shared_buffer(FILE *handle)
01621         : file(handle), references(1)
01622       { }
01623     } *b;
01624 
01625     char *c, *eob;  // current char and end-of-buffer
01626   };
01627 
01628   template <int BUFFER_SIZE = 8192>
01629   class frandom_iterator : public iterator<random_access_iterator_tag, char>
01630   {
01631   protected:
01632     struct shared_buffer
01633     {
01634       FILE* file;
01635       unsigned int references;
01636       char buffer[BUFFER_SIZE];
01637 
01638       shared_buffer(FILE *handle)
01639         : file(handle), references(1)
01640       { }
01641     } *buff;
01642 
01643     long _offset;
01644     char *c, *eob;  // current char and end-of-buffer
01645 
01646   public:
01647     typedef frandom_iterator<BUFFER_SIZE> self;
01648     typedef char value_type;
01649 
01650     frandom_iterator()
01651       : buff(NULL), c(NULL)
01652     { }
01653 
01654     frandom_iterator(FILE *handle)
01655       : buff(new shared_buffer(handle)),
01656         _offset(ftell(handle)), c(buff->buffer) {
01657       eob = c + fread(c, sizeof(char), BUFFER_SIZE, buff->file);
01658       if (c == eob) c = NULL;
01659     }
01660 
01661     int offset() const {
01662       return _offset;
01663     }
01664 
01665     ~frandom_iterator() {
01666       if (buff != NULL && --buff->references == 0)
01667         delete buff;
01668     }
01669 
01670     frandom_iterator(const self &x)
01671       : buff(x.buff), _offset(x._offset),  c(x.c), eob(x.eob) {
01672       if (buff != NULL) ++buff->references;
01673     }
01674 
01675     self& operator++ () {
01676       ++_offset;
01677       if (++c == eob) {
01678         if (buff->references > 1) {
01679           --buff->references;
01680           buff = new shared_buffer(buff->file);
01681         }
01682         fseek(buff->file, _offset, SEEK_SET);
01683         eob = buff->buffer + fread(buff->buffer, sizeof(char), BUFFER_SIZE, buff->file);
01684         if (eob == buff->buffer) c = NULL;      // EOF
01685         else c = buff->buffer;
01686 
01687       }
01688       return *this;
01689     }
01690 
01691     self operator++ (int) {
01692       self tmp = *this;
01693       ++(*this);
01694       return tmp;
01695     }
01696 
01697     self& operator=(const self &x) {
01698       if (this != &x) {
01699         if (buff != NULL && --buff->references == 0) delete buff;
01700         buff = x.buff; c = x.c; _offset = x._offset; eob = x.eob;
01701         if (buff != NULL) ++buff->references;
01702       }
01703       return *this;
01704     }
01705 
01706     value_type operator* () const {
01707       return *c;
01708     }
01709 
01710     bool operator==(const self &x) const {
01711       if (c == NULL) return x.c == NULL;
01712       return (x.c != NULL) && (_offset == x._offset);
01713     }
01714 
01715     bool operator!=(const self &x) const {
01716       return !(*this == x);
01717     }
01718 
01719     self operator+(int x) const {
01720       if (c + x >= eob || c + x < buff->buffer) {
01721         fseek(buff->file, _offset + x, SEEK_SET);
01722         return frandom_iterator(buff->file);
01723       }
01724       frandom_iterator r(*this);
01725       r.c += x; r._offset += x;
01726       return r;
01727     }
01728 
01729     self& operator+=(int x) {
01730       c += x;
01731       _offset += x;
01732       if (c >= eob || c < buff->buffer) {
01733         fseek(buff->file, _offset, SEEK_SET);
01734         c = buff->buffer;
01735         eob = c + fread(c, sizeof(char), BUFFER_SIZE, buff->file);
01736       }
01737       return *this;
01738     }
01739 
01740     self operator-(int x) const {
01741       if (c == NULL || c - x < buff->buffer || c - x >= eob) {
01742         fseek(buff->file, _offset - x, SEEK_SET);
01743         return frandom_iterator(buff->file);
01744       }
01745       frandom_iterator r(*this);
01746       r.c -= x; r._offset -= x;
01747       return r;
01748     }
01749 
01750     int operator-(const self &x) const {
01751       return _offset - x._offset;
01752     }
01753 
01754     self& operator-=(int x) {
01755       _offset -= x;
01756       if (c == NULL || c - x < buff->buffer || c - x >= eob) {
01757         fseek(buff->file, _offset, SEEK_SET);
01758         c = buff->buffer;
01759         eob = c + fread(c, sizeof(char), BUFFER_SIZE, buff->file);
01760       }
01761       else c -= x;
01762       return *this;
01763     }
01764 
01765     self& operator--() {
01766       return *this -= 1;
01767     }
01768 
01769     self operator--(int) {
01770       self tmp = *this;
01771       *this -= 1;
01772       return tmp;
01773     }
01774 
01775     bool operator<(const self &x) const {
01776       return _offset < x._offset;
01777     }
01778 
01779     value_type operator[](int x) const {
01780       return *(*this + x);
01781     }
01782   };
01783 
01784   // Equivalent to UNIX command "tr" (RTFM)
01785   template <typename InputIterator>
01786   class translate
01787     : public iterator<input_iterator_tag, char>
01788   {
01789   protected:
01790 #ifndef _MSC_VER
01791     InputIterator i;
01792 #endif
01793     vector<char> tr;
01794 
01795   public:
01796 #ifdef _MSC_VER
01797     InputIterator i;
01798 #endif
01799     typedef translate self;
01800 
01801     translate()
01802     { }
01803 
01804     translate(const InputIterator &x)
01805       : i(x)
01806     { }
01807 
01808     translate(const InputIterator &x, const string& src, const string& trg)
01809       : i(x), tr(256)
01810     {
01811       mapping(src, trg);
01812     }
01813 
01814     void mapping(const string& src, const string& trg) {
01815       tr.resize(256);
01816       for(vector<char>::size_type j = 0; j < 256; ++j)  // identity
01817         tr[j] = (char) j;
01818       for(string::const_iterator jj = src.begin(), k = trg.begin();
01819           jj != src.end(); ++jj, ++k)
01820         tr[(unsigned char) *jj] = *k;
01821     }
01822 
01823     value_type operator*() const {
01824       return tr[(unsigned char) *i];
01825     }
01826 
01827     self& operator=(const InputIterator &x) {
01828       i = x;
01829       return *this;
01830     }
01831 
01832     self& operator++() {
01833       ++i;
01834       return *this;
01835     }
01836 
01837     self operator++(int) {
01838       self tmp = *this;
01839       ++(*this);
01840       return tmp;
01841     }
01842 
01843 #ifndef _MSC_VER
01844     friend bool operator==<>(const self&, const self&);
01845 #endif
01846   };
01847 
01848   template <typename InputIterator>
01849   inline
01850   bool operator==(const translate<InputIterator> &x,
01851                   const translate<InputIterator> &y) {
01852     return x.i == y.i;
01853   }
01854 
01855   template <typename InputIterator>
01856   inline
01857   bool operator!=(const translate<InputIterator> &x,
01858                   const translate<InputIterator> &y) {
01859     return !(x == y);
01860   }
01861 
01862   // Equivalent to UNIX command "tr -s" (RTFM)
01863   template <typename InputIterator>
01864   class squeeze
01865     : public iterator<input_iterator_tag, char>
01866   {
01867   protected:
01868 #ifndef _MSC_VER
01869     InputIterator first, last;
01870 #endif
01871     vector<char>  squeeze_me;
01872     char          c;
01873 
01874   public:
01875 #ifdef _MSC_VER
01876     InputIterator first, last;
01877 #endif
01878     typedef squeeze self;
01879 
01880     squeeze(const InputIterator &eos = InputIterator())
01881       : first(eos)
01882     { }
01883 
01884     squeeze(const string& chars, const InputIterator &start,
01885             const InputIterator &finish = InputIterator())
01886       : first(start), last(finish), squeeze_me(256, (char) 0)
01887     {
01888       for(string::const_iterator j = chars.begin();
01889           j != chars.end(); ++j)
01890         squeeze_me[(unsigned char) *j] = (char) 1;
01891       if (!(first == last)) c = *first;
01892     }
01893 
01894     value_type operator*() const {
01895       return c;
01896     }
01897 
01898     self& operator++() {
01899       if (squeeze_me[(unsigned char) c] == (char) 0) {
01900         ++first;
01901         c = *first;
01902       }
01903       else {
01904         char d;
01905         while(!(++first == last)) {
01906           d = *first;
01907           if (d != c) {
01908             c = d;
01909             break;
01910           }
01911         }
01912       }
01913       return *this;
01914     }
01915 
01916     self operator++(int) {
01917       self tmp = *this;
01918       ++(*this);
01919       return tmp;
01920     }
01921 
01922 #ifndef _MSC_VER
01923     friend bool operator==<>(const self&, const self&);
01924 #endif
01925   };
01926 
01927   template <typename InputIterator>
01928   inline
01929   bool operator==(const squeeze<InputIterator> &x,
01930                   const squeeze<InputIterator> &y) {
01931     return x.first == y.first;
01932   }
01933 
01934   template <typename InputIterator>
01935   inline
01936   bool operator!=(const squeeze<InputIterator> &x,
01937                   const squeeze<InputIterator> &y) {
01938     return !(x == y);
01939   }
01940 
01941   // Equivalent to UNIX command "tr -d" (RTFM)
01942   template <typename InputIterator>
01943   class ignore
01944     : public iterator<input_iterator_tag, char>
01945   {
01946   protected:
01947 #ifndef _MSC_VER
01948     InputIterator first, last;
01949 #endif
01950     vector<char>  ignore_me;
01951     char          c;
01952 
01953   public:
01954 #ifdef _MSC_VER
01955     InputIterator first, last;
01956 #endif
01957     typedef ignore self;
01958 
01959     ignore(const InputIterator &eos = InputIterator())  // end of sequence
01960       : first(eos)
01961     { }
01962 
01963     ignore(const string& chars, const InputIterator &start,
01964            const InputIterator &finish = InputIterator())
01965       : first(start), last(finish), ignore_me(256, (char) 0)
01966     {
01967       for(string::const_iterator j = chars.begin();
01968           j != chars.end(); ++j)
01969         ignore_me[(unsigned char) *j] = (char) 1;
01970       if (!(first == last)) {
01971         c = *first;
01972         if (ignore_me[(unsigned char) c] == (char) 1)
01973           ++(*this);
01974       }
01975     }
01976 
01977     value_type operator*() const {
01978       return c;
01979     }
01980 
01981     self& operator++() {
01982       while(!(++first == last)) {
01983         c = *first;
01984         if (ignore_me[(unsigned char) c] == (char) 0)
01985           break;
01986       }
01987       return *this;
01988     }
01989 
01990     self operator++(int) {
01991       self tmp = *this;
01992       ++(*this);
01993       return tmp;
01994     }
01995 
01996 #ifndef _MSC_VER
01997     friend bool operator==<>(const self&, const self&);
01998 #endif
01999   };
02000 
02001   template <typename InputIterator>
02002   inline
02003   bool operator==(const ignore<InputIterator> &x,
02004                   const ignore<InputIterator> &y) {
02005     return x.first == y.first;
02006   }
02007 
02008   template <typename InputIterator>
02009   inline
02010   bool operator!=(const ignore<InputIterator> &x,
02011                   const ignore<InputIterator> &y) {
02012     return !(x == y);
02013   }
02014 
02015   //namespace std {
02016   template <typename T, typename U>
02017   inline
02018   ostream& operator<<(ostream &out, const pair<T, U> &p) {
02019     out << "<" << p.first << ", " << p.second << ">";
02020     return out;
02021   }
02022   //};
02023 
02024   template <class T, class Allocator>
02025   inline
02026   ostream& operator<<(ostream &out, const vector<T, Allocator> &v)
02027   {
02028     out << "(";
02029     if (v.empty()) return out << " )";
02030     typename vector<T, Allocator>::const_iterator i = v.begin();
02031     while (1) {
02032       out << *i;
02033       if (++i != v.end()) out << ",";
02034       else return out << ")";
02035     }
02036   }
02037 
02038   template <class T>
02039   inline
02040   ostream& operator<<(ostream &out, const set<T> &v)
02041   {
02042     out << "(";
02043     if (v.empty()) return out << " )";
02044     typename set<T>::const_iterator i = v.begin();
02045     while (1) {
02046       out << *i;
02047       if (++i != v.end()) out << ",";
02048       else return out << ")";
02049     }
02050   }
02051 
02052 #ifdef _MSC_VER
02053   // Some missing POSIX functions or STL extensions for VC++
02054   static const char *optarg;
02055   static int optind;
02056 
02057   static
02058   int getopt(int argc, char* const* argv, const char *opt)
02059   {
02060     static char* const* prev_argv = NULL;
02061 
02062     optarg = NULL;
02063     int r = -1;
02064 
02065     if (argv != prev_argv) {
02066       prev_argv = argv;
02067       optind = 1;
02068     }
02069 
02070     if (optind < argc) {
02071       if (argv[optind][0] == '-') {
02072         const char* p = find(opt, opt + strlen(opt), argv[optind][1]);
02073         if (*p == '\0') return '?';
02074         if (strlen(argv[optind]) == 2) {
02075           r = *p;
02076           ++optind;
02077           if (p[1] == ':')
02078             if (optind == argc || argv[optind][0] == '-')
02079               return ':'; // missing argument to option
02080             else optarg = argv[optind++];
02081         }
02082       }
02083     }
02084     return r;
02085   }
02086 
02087 #endif
02088 
02089   template <typename F, typename G>
02090   class unary_compose
02091     : public unary_function<typename G::argument_type,
02092                             typename F::result_type>
02093   {
02094   protected:
02095     F f;
02096     G g;
02097   public:
02098     unary_compose(const F &x, const G &y)
02099       : f(x), g(y) {}
02100     typename F::result_type
02101     operator()(const typename G::argument_type &x) const {
02102       return f(g(x));
02103     }
02104   };
02105 
02106   template <typename F, typename G>
02107   inline
02108   unary_compose<F, G>
02109   compose1(const F &x, const G &y) {
02110     return unary_compose<F, G>(x, y);
02111   }
02112 
02113   template <class Pair>
02114   struct select1st
02115     : public unary_function<Pair, typename Pair::first_type>
02116   {
02117     const typename Pair::first_type&
02118     operator()(const Pair& x) const { return x.first;  }
02119   };
02120 
02121   template <class Pair>
02122   struct select2nd
02123     : public unary_function<Pair, typename Pair::second_type>
02124   {
02125     const typename Pair::second_type&
02126     operator()(const Pair& x) const { return x.second;  }
02127   };
02128 
02129   template <typename T>
02130   struct identity : public unary_function<T, T>
02131   {
02132     const T& operator()(const T &x) const {
02133       return x;
02134     }
02135   };
02136 
02137   template <typename InputIterator1, typename InputIterator2>
02138   inline
02139   int lexicographical_compare_3way(InputIterator1 x1, InputIterator1 y1,
02140                                    InputIterator2 x2, InputIterator2 y2)
02141   {
02142     for(; x1 != y1 && x2 != y2; ++x1, ++x2) {
02143       if (*x1 < *x2) return -1;
02144       if (*x2 < *x1) return 1;
02145     }
02146     if (x2 == y2) return !(x1 == y1);
02147     return -1;
02148   }
02149 
02150   template <typename T, typename U>
02151   inline
02152   bool operator==(const pair<const T, U> &x, const pair<T, U> &y) {
02153     return x.first == y.first && x.second == y.second;
02154   }
02155 
02156 #ifndef _MSC_VER
02157   template <typename T, typename U>
02158   inline
02159   bool operator==(const pair<T, U> &x, const pair<const T, U> &y) {
02160     return x.first == y.first && x.second == y.second;
02161   }
02162 #endif
02163 
02164   template <size_t n>
02165   inline
02166   bool operator<(const bitset<n> &x, const bitset<n> &y)
02167   {
02168     return lexicographical_compare(x.begin(), x.end(),
02169                                    y.begin(), y.end());
02170   }
02171 
02172   // skip_blanks_iterator implements DFA_*::const_iterator
02173   // it skips holes in the pointers vector to internal states
02174   // left by del_state
02175   template <typename T>
02176   class skip_blanks_iterator
02177     : public iterator<forward_iterator_tag, typename vector<T*>::size_type>
02178   {
02179     typedef skip_blanks_iterator self;
02180 
02181   private:
02182     const vector<T*> *Q;
02183     typename vector<T*>::size_type pos;
02184 
02185   public:
02186     skip_blanks_iterator(const vector<T*> *_Q,
02187                          typename vector<T*>::size_type _pos)
02188       : Q(_Q), pos(_pos)
02189     { }
02190 
02191     skip_blanks_iterator() : Q(NULL), pos(0) { }
02192     skip_blanks_iterator(const self &x) : Q(x.Q), pos(x.pos) { }
02193     self& operator=(const self &x)
02194     {
02195       Q = x.Q;
02196       pos = x.pos;
02197       return *this;
02198     }
02199 
02200     bool operator==(const self &x) const { return x.pos == pos; }
02201     bool operator!=(const self &x) const { return x.pos != pos; }
02202 
02203     typename vector<T*>::size_type operator*() const {
02204       return pos;
02205     }
02206 
02207     self& operator++()  {
02208       for(++pos; pos < Q->size() && Q->operator[](pos) == NULL; ++pos);
02209       return *this;
02210     }
02211 
02212     self operator++(int) {
02213       self tmp = *this;
02214       ++(*this);
02215       return tmp;
02216     }
02217   };
02218 
02219 #ifndef _MSC_VER
02220   template <typename Key, typename Data,
02221             template <typename K, typename D> class Container = map>
02222   class cache : protected list<Data>
02223   {
02224   public:
02225     typedef Data data_type;
02226     typedef Key key_type;
02227     typedef pair<key_type, data_type> value_type;
02228     typedef list<data_type> super;
02229     typedef size_t size_type;
02230 
02231     cache(size_type capacity__)
02232       : capacity_(capacity__)
02233     { }
02234 
02235   protected:
02236     Container<Key, typename list<Data>::iterator> hasher;
02237     size_type capacity_;
02238   };
02239 #endif
02240 
02241 } // namespace astl
02242 
02243 #endif
02244 

Generated on Sun Mar 8 02:41:35 2009 for ASTL by  doxygen 1.5.7.1