00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef ASTL_CAPTURE_H
00023 #define ASTL_CAPTURE_H
00024
00025
00026
00027
00028
00029
00030 #include <astl.h>
00031 #include <lazy.h>
00032 #include <vector>
00033 #include <iostream>
00034 #include <set>
00035 #include <iterator>
00036 #include <cctype>
00037 #include <bitset>
00038 #include <cstring>
00039 #include <sstream>
00040
00041
00042
00043 #include <cassert>
00044
00045 #include <tr1/unordered_map>
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090 using namespace std;
00091
00092 namespace astl {
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105 class regexp_capture_cursor : public cursor_concept
00106 {
00107 public:
00108 typedef enum { STAR, CONCAT, OR, REPEAT, EPSILON,
00109 LETTER, OPEN, CLOSE, QUESTION, PLUS,
00110 ANY, OPENCAPTURE, CLOSECAPTURE, FINAL } token_type;
00111 protected:
00112 class node;
00113
00114 public:
00115 class capture
00116 {
00117 public:
00118 capture(const node *n)
00119 : n(n)
00120 { }
00121
00122 int id() const { return n->n; }
00123 token_type type() const { return n->type; }
00124
00125 bool operator<(const capture &c) const {
00126 return id() < c.id();
00127 }
00128
00129 protected:
00130 const node *n;
00131 };
00132
00133 class configuration;
00134 friend ostream& operator<<(ostream&, const configuration&);
00135
00136 class configuration : public map<capture, long>
00137 {
00138 public:
00139 bool operator<(const configuration &y) const {
00140
00141 cout << "comparing " << *this << " and " << y << " == ";
00142
00143 if (&y == this || empty())
00144 {
00145 cout << "false" << endl;
00146 return false;
00147 }
00148
00149 if (size() < y.size())
00150 {
00151 cout << "false" << endl;
00152 return false;
00153 }
00154 if (size() > y.size())
00155 {
00156 cout << "true" << endl;
00157 return true;
00158 }
00159 const_iterator i, j;
00160 for(i = begin(), j = y.begin();
00161 i != end() && i->first.id() == j->first.id() && i->first.type() == j->first.type() && i->second == j->second;
00162 ++i, ++j);
00163 if (i == end())
00164 {
00165 cout << "false" << endl;
00166 return false;
00167 }
00168 if (i->first.type() == CLOSECAPTURE) {
00169 if (j->first.type() == OPENCAPTURE)
00170 {
00171 cout << "true" << endl;
00172 return true;
00173 }
00174 }
00175 else
00176 {
00177 cout << "false" << endl;
00178 return false;
00179 }
00180 if (i->first.id() > j->first.id())
00181 {
00182 cout << "true" << endl;
00183 return true;
00184 }
00185 if (i->first.id() < j->first.id())
00186 {
00187 cout << "false" << endl;
00188 return false;
00189 }
00190 if (i->first.type() == OPENCAPTURE)
00191 {
00192 cout << (i->second < j->second ? "true" : "false") << endl;
00193 return i->second < j->second;
00194 }
00195 cout << (j->second < i->second ? "true" : "false") << endl;
00196 return j->second < i->second;
00197 }
00198
00199 configuration intersection(const configuration &x) const {
00200 configuration r;
00201 configuration::const_iterator i = begin(), j = x.begin();
00202 while(i != end() && j != x.end()) {
00203 if (i->first < j->first) ++i;
00204 else if (j->first < i->first) ++j;
00205 else if (i->second == j->second ||
00206 (i->first.type() == OPENCAPTURE && i->second < j->second) ||
00207 (i->first.type() != OPENCAPTURE && j->second < i->second)) {
00208 r.insert(*i);
00209 ++i;
00210 ++j;
00211 }
00212 else {
00213 r.insert(*j);
00214 ++i;
00215 ++j;
00216 }
00217 }
00218 return r;
00219 }
00220 };
00221
00222 protected:
00223 typedef node* position;
00224 typedef map<position, configuration> S;
00225
00226 struct node {
00227 bitset<256> letter;
00228 token_type type;
00229 short n, m;
00230 bool nullable;
00231 S firstpos, lastpos, nextpos;
00232 configuration emptymatch;
00233 configuration conf;
00234
00235 node(token_type t, short n_ = 0, short m_ = 0)
00236 : type(t), n(n_), m(m_), nullable(false) {
00237 switch (type) {
00238 case FINAL :
00239 letter.set(0);
00240 break;
00241 case ANY :
00242 type = LETTER;
00243 letter.set();
00244 break;
00245 case OPENCAPTURE :
00246 case CLOSECAPTURE :
00247 nullable = true;
00248 break;
00249 default :;
00250 }
00251 }
00252
00253 node(char t)
00254 : type(LETTER), nullable(false) {
00255 letter.set((unsigned char) t);
00256 }
00257
00258 node(const bitset<256> &t)
00259 : letter(t), type(LETTER), nullable(false)
00260 { }
00261
00262 bool final() const {
00263 return type == FINAL;
00264 }
00265
00266 char to_char() const {
00267 size_t i;
00268 for(i = 0; i != letter.size() && letter.test(i) == false; ++i);
00269 return (char) (-128 + i);
00270 }
00271
00272 string pretty() const {
00273 ostringstream r;
00274 r << "[ " << *this;
00275 for(S::const_iterator i = nextpos.begin(); i != nextpos.end(); ++i) {
00276 r << ", {";
00277 for(configuration::const_iterator j = i->second.begin(); j != i->second.end(); ++j)
00278 r << ' ' << j->first.id();
00279 r << " }";
00280 }
00281 r << " ]";
00282 return r.str();
00283 }
00284 };
00285
00286 protected:
00287 S q;
00288 smart_ptr<vector<node> > e;
00289 const char *errmsg;
00290 smart_ptr<S> I;
00291 unsigned char capture_count_;
00292 configuration intersection;
00293
00294 public:
00295 smart_ptr<horspool_finder> horspool;
00296
00297 friend ostream& operator<<(ostream&, const regexp_capture_cursor&);
00298 protected:
00299
00300 node* exp(vector<node>::iterator &first, vector<node>::iterator last);
00301 node* form(vector<node>::iterator &first, vector<node>::iterator last);
00302 node* term(vector<node>::iterator &first, vector<node>::iterator last);
00303 node* exp2(vector<node>::iterator &first, vector<node>::iterator last, node *left);
00304 node* term2(vector<node>::iterator &first, vector<node>::iterator last, node *left);
00305 node* repeat(vector<node>::iterator &first, vector<node>::iterator last, node *left);
00306 void preprocess_repeats();
00307 void build_char_sets(map<string, bitset<256> >&);
00308 template <typename OutputIterator>
00309 int tokenize(const char *first, const char *last, OutputIterator x);
00310
00311 public:
00312 friend ostream& operator<<(ostream &, const node&);
00313 ptrdiff_t errpos;
00314
00315 typedef regexp_capture_cursor self;
00316 typedef S state_type;
00317 typedef set<node*> ordered_state_type;
00318 typedef plain char_traits;
00319 typedef plain::char_type char_type;
00320 typedef configuration tag_type;
00321
00322 regexp_capture_cursor(const string &expression = "", bool horspool_optimized = false)
00323 : errmsg(NULL), capture_count_(0), errpos(-1)
00324 {
00325 if (!expression.empty()) {
00326 e->reserve(128);
00327 errpos = tokenize(expression.c_str(),
00328 expression.c_str() + expression.size(),
00329 back_inserter(*e));
00330 if (errpos == -1) {
00331 preprocess_repeats();
00332 vector<node>::iterator first = e->begin();
00333 node *root = exp(first, e->end());
00334
00335
00336 if (root != NULL) {
00337 q = root->firstpos;
00338 for(S::const_iterator i = q.begin(); i != q.end(); ++i)
00339
00340 if ((*i).first->letter.count() == 256 || (*i).first->letter.test(0) == false)
00341 I->insert(*i);
00342 }
00343 else errpos = expression.size() - errpos;
00344 }
00345 }
00346 }
00347
00348 ptrdiff_t error() const {
00349 return errpos;
00350 }
00351
00352 const char* error_message() const { return errmsg; }
00353
00354 self& operator=(const state_type &s) {
00355 q = s;
00356 return *this;
00357 }
00358
00359 const state_type& src() const { return q; }
00360
00361 bool exists(char a) const {
00362 for(S::const_iterator i = q.begin(); i != q.end(); ++i)
00363 if ((*i).first->letter.test((unsigned char) a) == true)
00364 return true;
00365 return false;
00366 }
00367
00368 int capture_count() const { return capture_count_; }
00369
00370 bool forward(char letter) {
00371 state_type p = *I;
00372 intersection.clear();
00373 cout << "forward(" << (int) (unsigned char) letter << ")" << endl;
00374 for(S::const_iterator i = q.begin(); i != q.end(); ++i) {
00375 cout << i->first << ':' << i->first->pretty() << ":=" << i->second << endl;
00376
00377 if (letter == '\0') {
00378 if (i->first->letter.test((unsigned char) '\0') == true) {
00379 if (i->second < intersection)
00380 intersection = i->second;
00381 }
00382 }
00383 else if (i->first->letter.test((unsigned char) letter) == true) {
00384 if (i == q.begin())
00385 intersection = i->second;
00386 else
00387 intersection = intersection.intersection(i->second);
00388 }
00389 }
00390 cout << "intersection " << intersection << endl;
00391 for(S::const_iterator i = q.begin(); i != q.end(); ++i) {
00392 if (i->first->letter.test((unsigned char) letter) == true) {
00393 state_type::const_iterator first = i->first->nextpos.begin();
00394 const state_type::const_iterator last = i->first->nextpos.end();
00395 for(; first != last; ++first) {
00396 configuration tmp;
00397 for(configuration::const_iterator cc = i->second.begin(); cc != i->second.end(); ++cc)
00398 if (intersection.find(cc->first) == intersection.end())
00399 tmp.insert(make_pair(cc->first, cc->second - 1));
00400 configuration::const_iterator
00401 cfirst = first->second.begin(),
00402 clast = first->second.end();
00403 for(; cfirst != clast; ++cfirst)
00404 tmp[cfirst->first] = 0;
00405 configuration &conf = p[first->first];
00406 if (tmp < conf) conf = tmp;
00407 }
00408 }
00409 }
00410 q.swap(p);
00411 return !q.empty();
00412 }
00413
00414 bool src_final() const {
00415 for(S::const_iterator i = q.begin(); i != q.end(); ++i)
00416 if (i->first->final()) return true;
00417 return false;
00418 }
00419
00420 bool sink() const { return q.empty(); }
00421
00422 const tag_type& src_tag() const { return intersection; }
00423
00424 void clear() {
00425 q.clear();
00426 e->clear();
00427 I->clear();
00428 }
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458
00459
00460
00461
00462
00463
00464
00465
00466
00467
00468
00469 protected:
00470 ptrdiff_t token_error(ptrdiff_t pos, const char *message) {
00471 errmsg = message;
00472 errpos = pos;
00473 return pos;
00474 }
00475
00476 node* syntax_error(ptrdiff_t pos, const char *message = NULL) {
00477 errmsg = message;
00478 errpos = pos;
00479 return NULL;
00480 }
00481
00482 int next_char(bitset<256>& letter, const char *begin, const char*& first, const char *last,
00483 bool& literal, const map<string, bitset<256> >& char_sets, bool icase);
00484
00485 };
00486
00487 inline
00488 ostream& operator<<(ostream &out, const regexp_capture_cursor::configuration &c) {
00489 for(regexp_capture_cursor::configuration::const_iterator i = c.begin();
00490 i != c.end(); ++i)
00491 out << i->first.id() << ':' << i->second << ' ';
00492 return out;
00493 }
00494
00495 inline
00496 void regexp_capture_cursor::preprocess_repeats() {
00497 vector<node> &v = *e;
00498 for(vector<node>::size_type p = 0; p < v.size();) {
00499 if (v[p].type != REPEAT) ++p;
00500 else {
00501
00502 vector<node>::size_type first = p - 1, last = p;
00503 for(; first > 0; --first) {
00504 if (v[first].type == LETTER || v[first].type == CLOSE || v[first].type == ANY)
00505 break;
00506 }
00507 if (v[first].type == CLOSE) {
00508 int count = 1;
00509 while(count != 0 && --first > 0)
00510 if (v[first].type == CLOSE) ++count;
00511 else if (v[first].type == OPEN) --count;
00512 }
00513
00514
00515
00516
00517 short n = (v[p].n > 128 ? 128 : v[p].n),
00518 m = (v[p].m > 128 ? 128 : v[p].m);
00519 v.insert(v.begin() + first, node(OPEN));
00520 ++p;
00521 ++last;
00522 ++first;
00523
00524 if (n == 0)
00525 v[p] = node(QUESTION);
00526 else
00527 v.erase(v.begin() + p);
00528 vector<node> tmp;
00529 if (m == -1)
00530 tmp.reserve((last + 1 - first) * n + 1);
00531 else
00532 tmp.reserve((last + 1 - first) * m + m - n + 1);
00533
00534 while(n > 0 || m > 0) {
00535 --n;
00536 --m;
00537 if (n <= 0) {
00538 if (m < n) {
00539 tmp.push_back(node(PLUS));
00540 continue;
00541 }
00542 if (m == 0)
00543 continue;
00544 }
00545 tmp.push_back(node(CONCAT));
00546 for(vector<node>::size_type tmp_p = first; tmp_p < last; ++tmp_p)
00547 tmp.push_back(v[tmp_p]);
00548 if (n <= 0)
00549 tmp.push_back(node(QUESTION));
00550 }
00551 tmp.push_back(node(CLOSE));
00552 v.insert(v.begin() + p, tmp.begin(), tmp.end());
00553 p += tmp.size();
00554 }
00555 }
00556 }
00557
00558 inline
00559 ostream& operator<<(ostream &out, const regexp_capture_cursor::node &x)
00560 {
00561 switch(x.type) {
00562 case regexp_capture_cursor::LETTER :
00563 if (x.letter.count() == 256)
00564 out << ".";
00565 else {
00566 if (x.letter.count() > 1)
00567 out << '[';
00568 int lower = 0;
00569
00570 for(int i = 32; i < 257; ++i) {
00571 if (i < 256 && x.letter.test(i) == true) {
00572 if (lower == 0)
00573 lower = i;
00574 }
00575 else {
00576 if (lower > 0) {
00577 if (i - lower == 1) {
00578 out << (unsigned char) lower;
00579 }
00580 else if (i - lower == 2) {
00581 out << (unsigned char) lower << (unsigned char) (lower + 1);
00582 }
00583 else out << (unsigned char) lower << '-'
00584 << (unsigned char) (i - 1);
00585 lower = 0;
00586 }
00587 }
00588 }
00589 if (x.letter.count() > 1)
00590 out << ']';
00591 }
00592 break;
00593
00594 case regexp_capture_cursor::OR :
00595 out << "|";
00596 break;
00597
00598 case regexp_capture_cursor::FINAL :
00599 out << "[EoE]";
00600 break;
00601
00602 case regexp_capture_cursor::OPEN :
00603 out << "(";
00604 break;
00605
00606 case regexp_capture_cursor::CLOSE :
00607 out << ")";
00608 break;
00609
00610 case regexp_capture_cursor::CONCAT :
00611 out << ".";
00612 break;
00613
00614 case regexp_capture_cursor::STAR :
00615 out << "*";
00616 break;
00617
00618 case regexp_capture_cursor::QUESTION :
00619 out << "?";
00620 break;
00621
00622 case regexp_capture_cursor::PLUS :
00623 out << "+";
00624 break;
00625
00626 case regexp_capture_cursor::REPEAT :
00627 out << "{" << x.n << "," << x.m << "}";
00628 break;
00629
00630 case regexp_capture_cursor::OPENCAPTURE :
00631 out << "_(" << x.n << "_";
00632 break;
00633
00634 case regexp_capture_cursor::CLOSECAPTURE :
00635 out << "_)" << x.n << "_";
00636 break;
00637
00638 default :
00639 break;
00640 }
00641 return out;
00642 }
00643
00644 inline
00645 void regexp_capture_cursor::build_char_sets(map<string, bitset<256> >& char_sets) {
00646 bitset<256> alpha, alnum, ascii, blank, cntrl, digit, word;
00647 bitset<256> graph, lower, print, punct, space, upper, xdigit;
00648 for(int c = 0; c < 256; ++c) {
00649 alnum.set(c, isalnum(c) != 0);
00650 alpha.set(c, isalpha(c) != 0);
00651 ascii.set(c, isascii(c) != 0);
00652 cntrl.set(c, iscntrl(c) != 0);
00653 digit.set(c, isdigit(c) != 0);
00654 graph.set(c, isgraph(c) != 0);
00655 lower.set(c, islower(c) != 0);
00656 print.set(c, isprint(c) != 0);
00657 punct.set(c, ispunct(c) != 0);
00658 space.set(c, isspace(c) != 0);
00659 upper.set(c, isupper(c) != 0);
00660 xdigit.set(c, isxdigit(c) != 0);
00661 }
00662 char_sets["[:alnum:]"] = alnum;
00663 char_sets["[:^alnum:]"] = ~ char_sets["[:alnum:]"];
00664 char_sets["[:alpha:]"] = alpha;
00665 char_sets["[:^alpha:]"] = ~ char_sets["[:alpha:]"];
00666 char_sets["[:ascii:]"] = ascii;
00667 char_sets["[:^ascii:]"] = ~ char_sets["[:ascii:]"];
00668 blank.set((unsigned char) ' ');
00669 blank.set((unsigned char) '\t');
00670 char_sets["[:blank:]"] = blank;
00671 char_sets["[:^blank:]"] = ~ char_sets["[:blank:"];
00672 char_sets["[:cntrl:]"] = cntrl;
00673 char_sets["[:^cntrl:]"] = ~ char_sets["[:cntrl:]"];
00674 char_sets["[:digit:]"] = digit;
00675 char_sets["[:^digit:]"] = ~ char_sets["[:digit:]"];
00676 char_sets["[:graph:]"] = graph;
00677 char_sets["[:^graph:]"] = ~ char_sets["[:graph:]"];
00678 char_sets["[:lower:]"] = lower;
00679 char_sets["[:^lower:]"] = ~ char_sets["[:lower:]"];
00680 char_sets["[:print:]"] = print;
00681 char_sets["[:^print:]"] = ~ char_sets["[:print:]"];
00682 char_sets["[:punct:]"] = punct;
00683 char_sets["[:^punct:]"] = ~ char_sets["[:punct:]"];
00684 char_sets["[:space:]"] = space;
00685 char_sets["[:^space:]"] = ~ char_sets["[:space:]"];
00686 char_sets["[:upper:]"] = upper;
00687 char_sets["[:^upper:]"] = ~ char_sets["[:upper:]"];
00688 char_sets["[:xdigit:]"] = xdigit;
00689 char_sets["[:^xdigit:]"] = ~ char_sets["[:xdigit:]"];
00690 alnum.set((unsigned char) '_');
00691 char_sets["[:word:]"] = alnum;
00692 char_sets["[:^word:]"] = ~ char_sets["[:word:]"];
00693 char_sets["\\d"] = digit;
00694 char_sets["\\D"] = ~ char_sets["\\d"];
00695 char_sets["\\s"] = space;
00696 char_sets["\\S"] = ~ char_sets["\\s"];
00697 char_sets["\\w"] = alnum;
00698 char_sets["\\W"] = ~alnum;
00699 char_sets["\\t"] = bitset<256>().set((unsigned char) '\t');
00700 char_sets["\\n"] = bitset<256>().set((unsigned char) '\n');
00701 char_sets["\\r"] = bitset<256>().set((unsigned char) '\r');
00702 char_sets["\\f"] = bitset<256>().set((unsigned char) '\f');
00703 char_sets["\\a"] = bitset<256>().set((unsigned char) '\a');
00704 char_sets["\\e"] = bitset<256>().set((unsigned char) '\x1B');
00705 char_sets["\\v"] = bitset<256>().set((unsigned char) '\v');
00706 char_sets["\\V"] = ~ char_sets["\\v"];
00707 char_sets["\\h"] = bitset<256>().set((unsigned char) ' ');
00708 char_sets["\\H"] = ~ char_sets["\\h"];
00709 }
00710
00711
00712
00713 inline
00714 int regexp_capture_cursor::next_char(bitset<256>& letter, const char *begin, const char*& first,
00715 const char *last, bool& literal,
00716 const map<string, bitset<256> >& char_sets, bool icase) {
00717 letter.reset();
00718
00719
00720 if ((*first == '$' || *first == '^') && !literal) {
00721 letter.set((unsigned char) 0);
00722 ++first;
00723 return -1;
00724 }
00725
00726 if (*first == '\\') {
00727 if (++first == last)
00728 return token_error(first - begin, "unterminated escaped sequence");
00729
00730
00731 if (*first == 'Q') {
00732 if (literal) {
00733 letter.set((unsigned char) '\\');
00734 return -1;
00735 }
00736 else {
00737 literal = true;
00738 if (++first == last)
00739 return token_error(first - begin, "unterminated quote sequence");
00740 return next_char(letter, begin, first, last, literal, char_sets, icase);
00741 }
00742 }
00743 if (*first == 'E') {
00744 if (!literal) {
00745 letter.set((unsigned char) 'E');
00746 if (icase)
00747 letter.set((unsigned char) 'e');
00748 }
00749 else literal = false;
00750 ++first;
00751 return -1;
00752 }
00753
00754 if (literal) {
00755 letter.set((unsigned char) '\\');
00756 return -1;
00757 }
00758
00759 if (*first == 'x') {
00760 if (++first == last)
00761 return token_error(first - begin, "\\x used with no following hex digits");
00762 int n = 0;
00763 if (*first >= '0' && *first <= '9') {
00764 n = 16 * (*first - '0');
00765 }
00766 else
00767 if (*first >= 'A' && *first <= 'F') {
00768 n = 16 * (10 + *first - 'A');
00769 }
00770 else
00771 if (*first >= 'a' && *first <= 'f') {
00772 n = 16 * (10 + *first - 'a');
00773 }
00774 else return token_error(first - begin, "\\x used with no following hex digits");
00775
00776 if (++first == last)
00777 return token_error(first - begin, "\\x used with only one following hex digit");
00778
00779 if (*first >= '0' && *first <= '9') {
00780 n += *first - '0';
00781 }
00782 else
00783 if (*first >= 'A' && *first <= 'F') {
00784 n += 10 + (*first - 'A');
00785 }
00786 else
00787 if (*first >= 'a' && *first <= 'f') {
00788 n += 10 + (*first - 'a');
00789 }
00790 else return token_error(first - begin, "\\x used with only one following hex digit");
00791 letter.set(n);
00792 ++first;
00793 return -1;
00794 }
00795
00796 if (*first == 'c') {
00797 if (++first == last || *first < 'A' || *first > 'Z')
00798 return token_error(first - begin, "\\c used with no following upper-case letter");
00799 letter.set(*first - 'A' + 1);
00800 ++first;
00801 return -1;
00802 }
00803
00804
00805 std::map<string, bitset<256> >::const_iterator c = char_sets.find(std::string("\\") + *first);
00806 if (c != char_sets.end()) {
00807 letter = c->second;
00808 ++first;
00809 return -1;
00810 }
00811
00812 if (*first == '.') {
00813 letter.set((unsigned char) '.');
00814 ++first;
00815 return -1;
00816 }
00817 }
00818
00819 if (*first == '.') {
00820 if (literal)
00821 letter.set((unsigned char) '.');
00822 else
00823 letter.set();
00824 }
00825 else
00826 if (icase && isalpha(*first)) {
00827 letter.set((unsigned char) tolower((unsigned char) *first));
00828 letter.set((unsigned char) toupper((unsigned char) *first));
00829 }
00830 else letter.set((unsigned char) *first);
00831 ++first;
00832 return -1;
00833 }
00834
00835
00836 template <typename OutputIterator>
00837 int regexp_capture_cursor::tokenize(const char *first, const char *last, OutputIterator x)
00838 {
00839 static map<string, bitset<256> > char_sets;
00840 if (char_sets.empty())
00841 build_char_sets(char_sets);
00842
00843 *x++ = node(OPEN);
00844 node previous = node(OPEN);
00845
00846 bool literal = false;
00847 std::vector<char> parenthesis;
00848 bool icase = false;
00849 std::vector<int> open_captures;
00850 int current_open_capture = 0;
00851
00852 for(const char *begin = first; first != last; ++first, ++x) {
00853 if (literal && *first != '\\') {
00854 if (previous.type != OPEN && previous.type != OR)
00855 *x++ = node(CONCAT);
00856 if (icase && isalpha(*first)) {
00857 bitset<256> c;
00858 c.set((unsigned char) tolower(*first));
00859 c.set((unsigned char) toupper(*first));
00860 *x = previous = node(c);
00861 }
00862 else
00863 *x = previous = node(*first);
00864 continue;
00865 }
00866
00867 switch (*first) {
00868 case '|' :
00869 if (first == begin)
00870 return token_error(0, "missing left part of alternative");
00871 *x = previous = node(OR);
00872 break;
00873
00874 case '(' :
00875 if (previous.type != OPEN && previous.type != OR)
00876 *x++ = node(CONCAT);
00877 if (first + 2 < last && first[1] == '?' && first[2] == ':') {
00878 *x = previous = node(OPEN);
00879 first += 2;
00880 parenthesis.push_back('(');
00881 }
00882 else {
00883 if (first + 3 < last && first[1] == '?' &&
00884 first[2] == 'i' && first[3] == ':') {
00885 first += 3;
00886 if (icase == false) {
00887 parenthesis.push_back('i');
00888 icase = true;
00889 }
00890 else parenthesis.push_back('(');
00891 }
00892 else {
00893 ++capture_count_;
00894 *x++ = node(OPEN);
00895 *x++ = node(OPENCAPTURE, current_open_capture);
00896 open_captures.push_back(current_open_capture);
00897 current_open_capture += 2;
00898 *x++ = node(CONCAT);
00899 parenthesis.push_back('c');
00900 }
00901 *x = previous = node(OPEN);
00902 }
00903 break;
00904
00905 case ')' :
00906 if (parenthesis.empty())
00907 return token_error(first - begin, "unbalanced parenthesis");
00908 if (parenthesis.back() == 'i')
00909 icase = false;
00910 else
00911 if (parenthesis.back() == 'c') {
00912 ++capture_count_;
00913 *x++ = node(CLOSE);
00914 *x++ = node(CONCAT);
00915 *x++ = node(CLOSECAPTURE, open_captures.back() + 1);
00916 open_captures.pop_back();
00917 }
00918 *x = previous = node(CLOSE);
00919 parenthesis.pop_back();
00920 break;
00921
00922 case '*' :
00923 if (first == begin)
00924 return token_error(0, "missing expression before operator *");
00925 *x = previous = node(STAR);
00926 break;
00927
00928 case '?' :
00929 if (first == begin)
00930 return token_error(0, "missing expression before operator ?");
00931 *x = previous = node(QUESTION);
00932 break;
00933
00934 case '+' :
00935 if (first == begin)
00936 return token_error(0, "missing expression before operator +");
00937 *x = previous = node(PLUS);
00938 break;
00939
00940 case '{' :
00941 if (first == begin)
00942 return token_error(0, "missing expression before repeat operator");
00943 if (++first == last)
00944 return token_error(first - begin, "unterminated expression");
00945 {
00946 int n = 0;
00947 const char *tmp = first;
00948 while (isdigit(*first)) {
00949 n = n * 10 + *first - '0';
00950 if (++first == last)
00951 return token_error(first - begin, "unterminated repeat operator");
00952 }
00953 int m = n;
00954 if (*first == ',') {
00955 if (++first == last)
00956 return token_error(first - begin, "unterminated repeat operator");
00957 tmp = first;
00958 for(m = 0; isdigit(*first);) {
00959 m = m * 10 + *first - '0';
00960 if (++first == last)
00961 return token_error(first - begin, "unterminated repeat operator");
00962 }
00963 if (tmp == first) m = -1;
00964 }
00965 if (*first != '}')
00966 return token_error(first - begin, "invalid repeat operator");
00967
00968
00969 if (n > 32768 || m > 65535 || (n > m && m > -1) || (n == m && n == 0))
00970 return token_error(first - begin, "wrong range for repeat operator");
00971
00972
00973 if (n == 0 && m == 1)
00974 *x = node(QUESTION);
00975 else if (n == 0 && m == -1)
00976 *x = node(STAR);
00977 else if (n == 1 && m == -1)
00978 *x = node(PLUS);
00979 else if (!(n == 1 && m == 1))
00980 *x = node(REPEAT, n, m);
00981 }
00982 break;
00983
00984 case '[' :
00985 {
00986 if (previous.type != OPEN && previous.type != OPENCAPTURE && previous.type != OR)
00987 *x++ = node(CONCAT);
00988 bitset<256> a;
00989 bool v = true;
00990 if (++first == last)
00991 return token_error(first - begin, "unterminated expression");
00992 if (*first == '^') {
00993 a.set();
00994 a.set(0, false);
00995 v = false;
00996 if (++first == last)
00997 return token_error(first - begin, "unterminated expression");
00998 }
00999 if (*first == '-') {
01000 a.set((unsigned char) '-', v);
01001 ++first;
01002 }
01003 else
01004 if (*first == ']') {
01005 a.set((unsigned char) ']', v);
01006 ++first;
01007 }
01008 for(; first != last && *first != ']'; ++first) {
01009 if (*first == '[') {
01010 if (++first == last) break;
01011 if (*first != ':') {
01012 a.set((unsigned char) '[', v);
01013 }
01014 else {
01015 const char *start = first - 1;
01016 first = std::find(++first, last, ']');
01017 if (first == last) break;
01018 std::map<string, bitset<256> >::const_iterator c = char_sets.find(std::string(start, first + 1));
01019 if (c == char_sets.end())
01020 return token_error(start - begin, "unknown character set");
01021 if (v == true)
01022 a |= c->second;
01023 else a &= ~c->second;
01024 continue;
01025 }
01026 }
01027
01028
01029
01030 bitset<256> tmp;
01031 if (*first != '-') {
01032 switch (*first) {
01033 case '^' :
01034 case '$' :
01035 tmp.set((unsigned char) *first++);
01036 break;
01037 default :
01038 {
01039 int r = next_char(tmp, begin, first, last, literal, char_sets, icase);
01040 if (r != -1)
01041 return r;
01042
01043 if (tmp.count() == 256) {
01044 tmp.reset();
01045 tmp.set((unsigned char) '.');
01046 }
01047 }
01048 }
01049 if (v == true)
01050 a |= tmp;
01051 else a &= ~ tmp;
01052 if (*first != '-') {
01053 --first;
01054 continue;
01055 }
01056 }
01057
01058 if (++first == last)
01059 return token_error(first - begin, "unterminated expression");
01060 if (*first == ']') {
01061 a.set((unsigned char) '-', v);
01062 break;
01063 }
01064 else {
01065 if (tmp.count() != 1) {
01066 return token_error(first - 2 - begin, "wrong character range: the lower bound cannot be a character set");
01067 }
01068 bitset<256> upper_bound;
01069 switch (*first) {
01070 case '^' :
01071 case '$' :
01072 upper_bound.set((unsigned char) *first);
01073 break;
01074 default :
01075 {
01076 int r = next_char(upper_bound, begin, first, last, literal, char_sets, icase);
01077 --first;
01078 if (r != -1)
01079 return token_error(first - begin, "wrong character range");
01080
01081 if (upper_bound.count() == 256) {
01082 upper_bound.reset();
01083 upper_bound.set((unsigned char) '.');
01084 }
01085 }
01086 }
01087 if (upper_bound.count() != 1)
01088 return token_error(first - begin, "wrong character range: the upper bound cannot be a character set");
01089
01090 unsigned int int_lower_bound = 0, int_upper_bound = 0;
01091 while (tmp.test(int_lower_bound) == false)
01092 ++int_lower_bound;
01093 while (upper_bound.test(int_upper_bound) == false)
01094 ++int_upper_bound;
01095
01096
01097 if (int_upper_bound < int_lower_bound)
01098 return token_error(first - begin, "wrong character range: lower bound cannot be greater than upper bound");
01099 else
01100 for(unsigned int i = int_lower_bound; i <= int_upper_bound; ++i)
01101 a.set(i, v);
01102 }
01103 }
01104
01105 if (first == last)
01106 return token_error(first - begin, "unterminated expression");
01107 *x = previous = node(a);
01108 break;
01109 }
01110
01111 default :
01112 bitset<256> next_letter;
01113 int r = next_char(next_letter, begin, first, last, literal, char_sets, icase);
01114 if (r != -1)
01115 return r;
01116 if (next_letter.any()) {
01117 if (previous.type != OPEN && previous.type != OR)
01118 *x++ = node(CONCAT);
01119 if (next_letter.count() == 256)
01120 #ifndef UTF8
01121 *x = previous = node(ANY);
01122 #else
01123 {
01124 *x++ = node(OPEN);
01125 next_letter.reset();
01126 for(int i = 1; i < 0x7F; ++i)
01127 next_letter.set(i);
01128 *x++ = node(next_letter);
01129 *x++ = node(OR);
01130 next_letter.reset();
01131 for(int i = 0; i < 32; ++i)
01132 next_letter.set(i | 192);
01133 *x++ = node(next_letter);
01134 *x++ = node(CONCAT);
01135 *x++ = node(ANY);
01136 *x = previous = node(CLOSE);
01137 }
01138 #endif
01139 else
01140 *x = previous = node(next_letter);
01141 }
01142 --first;
01143 break;
01144 }
01145 }
01146 *x++ = node(CLOSE);
01147 *x++ = node(CONCAT);
01148 *x++ = node(FINAL);
01149 *x++ = node(CONCAT);
01150 *x++ = node(FINAL);
01151 return -1;
01152 }
01153
01154
01155 inline
01156 regexp_capture_cursor::node*
01157 regexp_capture_cursor::exp(vector<regexp_capture_cursor::node>::iterator &first,
01158 vector<regexp_capture_cursor::node>::iterator last)
01159 {
01160 if (first == last)
01161 return syntax_error(last - first, "empty expression");
01162 node *t = term(first, last);
01163 assert(t != 0);
01164 if (t == 0) return 0;
01165 return exp2(first, last, t);
01166 }
01167
01168
01169 inline
01170 regexp_capture_cursor::node*
01171 regexp_capture_cursor::exp2(vector<regexp_capture_cursor::node>::iterator &first,
01172 vector<regexp_capture_cursor::node>::iterator last, node *left)
01173 {
01174 assert(left != 0);
01175 if (first != last && first->type == OR) {
01176 node &root = *first;
01177 node *right = term(++first, last);
01178 if (right == 0) return 0;
01179 root.nullable = right->nullable || left->nullable;
01180
01181 root.firstpos = left->firstpos;
01182 root.firstpos.insert(right->firstpos.begin(), right->firstpos.end());
01183 root.lastpos = left->lastpos;
01184 root.lastpos.insert(right->lastpos.begin(), right->lastpos.end());
01185 if (left->nullable)
01186 root.emptymatch = left->emptymatch;
01187 else
01188 root.emptymatch = right->emptymatch;
01189 return exp2(first, last, &root);
01190 }
01191 return left;
01192 }
01193
01194
01195 inline
01196 regexp_capture_cursor::node*
01197 regexp_capture_cursor::term(vector<regexp_capture_cursor::node>::iterator &first,
01198 vector<regexp_capture_cursor::node>::iterator last)
01199 {
01200 if (first == last)
01201 return syntax_error(last - first, "unterminated expression");
01202 node *f = form(first, last);
01203 assert(f != 0);
01204 if (f == 0) return 0;
01205 node *s = repeat(first, last, f);
01206 return term2(first, last, s);
01207 }
01208
01209
01210 inline
01211 regexp_capture_cursor::node*
01212 regexp_capture_cursor::repeat(vector<regexp_capture_cursor::node>::iterator &first,
01213 vector<regexp_capture_cursor::node>::iterator last, node *left)
01214 {
01215 assert(left != 0);
01216 if (first != last)
01217 switch (first->type) {
01218 case STAR :
01219 case PLUS :
01220 {
01221 node &root = *first;
01222 root.nullable = first->type == STAR;
01223 root.firstpos = left->firstpos;
01224 root.lastpos = left->lastpos;
01225 if (left->nullable)
01226 root.emptymatch = left->emptymatch;
01227
01228 for(S::const_iterator i = root.lastpos.begin(); i != root.lastpos.end(); ++i)
01229 for(S::const_iterator j = root.firstpos.begin(); j != root.firstpos.end(); ++j) {
01230 configuration tmp = i->second;
01231 tmp.insert(j->second.begin(), j->second.end());
01232 (*i).first->nextpos.insert(make_pair(j->first, tmp));
01233 }
01234 return repeat(++first, last, &root);
01235 }
01236 case QUESTION :
01237 {
01238 node &root = *first;
01239 root.nullable = true;
01240 root.firstpos = left->firstpos;
01241 root.lastpos = left->lastpos;
01242 if (left->nullable)
01243 root.emptymatch = left->emptymatch;
01244 return repeat(++first, last, &root);
01245 }
01246 default :
01247 break;
01248 }
01249 return left;
01250 }
01251
01252
01253
01254 inline
01255 regexp_capture_cursor::node*
01256 regexp_capture_cursor::term2(vector<regexp_capture_cursor::node>::iterator &first,
01257 vector<regexp_capture_cursor::node>::iterator last, node *left)
01258 {
01259 if (first != last && first->type == CONCAT) {
01260 node &root = *first;
01261 node *f = form(++first, last);
01262 assert(f != 0);
01263 if (f == 0) return 0;
01264 node *right = repeat(first, last, f);
01265 root.nullable = left->nullable && right->nullable;
01266
01267 root.firstpos = left->firstpos;
01268 if (left->nullable) {
01269 for(S::const_iterator s = right->firstpos.begin(); s != right->firstpos.end(); ++s) {
01270 configuration tmp = s->second;
01271 tmp.insert(left->emptymatch.begin(), left->emptymatch.end());
01272 root.firstpos.insert(make_pair(s->first, tmp));
01273 }
01274 }
01275
01276 root.lastpos = right->lastpos;
01277 if (right->nullable) {
01278 for(S::const_iterator s = left->lastpos.begin(); s != left->lastpos.end(); ++s) {
01279 configuration tmp = s->second;
01280 tmp.insert(right->emptymatch.begin(), right->emptymatch.end());
01281 root.lastpos.insert(make_pair(s->first, tmp));
01282 }
01283 }
01284
01285 root.emptymatch = left->emptymatch;
01286 root.emptymatch.insert(right->emptymatch.begin(), right->emptymatch.end());
01287
01288
01289 for(S::const_iterator i = left->lastpos.begin(); i != left->lastpos.end(); ++i)
01290 for(S::const_iterator j = right->firstpos.begin(); j != right->firstpos.end(); ++j) {
01291 configuration tmp = j->second;
01292 tmp.insert(i->second.begin(), i->second.end());
01293 i->first->nextpos.insert(make_pair(j->first, tmp));
01294 }
01295 return term2(first, last, &root);
01296 }
01297 return left;
01298 }
01299
01300
01301
01302 inline
01303 regexp_capture_cursor::node*
01304 regexp_capture_cursor::form(vector<regexp_capture_cursor::node>::iterator &first,
01305 vector<regexp_capture_cursor::node>::iterator last)
01306 {
01307 if (first == last)
01308 return syntax_error(last - first, "unterminated expression");
01309 if (first->type == OPEN) {
01310 node *e = exp(++first, last);
01311 assert(e != 0);
01312 if (e == 0) return 0;
01313 if (first->type == CLOSE) ++first;
01314 else return syntax_error(last - first, "unbalanced parenthesis");
01315 return e;
01316 }
01317 if (first->type == OPENCAPTURE || first->type == CLOSECAPTURE) {
01318 first->emptymatch[&*first] = 0;
01319 ++first;
01320 return &(first[-1]);
01321 }
01322 first->nullable = false;
01323 first->firstpos.insert(make_pair(&*first, configuration()));
01324 first->lastpos.insert(make_pair(&*first, configuration()));
01325 ++first;
01326 return &(first[-1]);
01327 }
01328
01329 inline
01330 ostream& operator<<(ostream& out, const regexp_capture_cursor& c) {
01331 for(vector<regexp_capture_cursor::node>::const_iterator n = c.e->begin(); n != c.e->end(); ++n) {
01332 if (c.q.find(const_cast<astl::regexp_capture_cursor::node*>(&*n)) != c.q.end())
01333 out << "\033[1m" << *n << "\033[0m";
01334 else
01335 out << *n;
01336 }
01337 return out;
01338 }
01339
01340
01341
01342
01343
01344
01345
01346
01347
01348
01349
01350 template <typename Tag, typename SigmaOut>
01351 class DFT_matrix_mini : public DFA_matrix_mini<plain, Tag>
01352 {
01353 public:
01354 typedef DFA_matrix_mini<plain, Tag> super;
01355 typedef typename super::state_type state_type;
01356 typedef typename super::char_type char_type;
01357 typedef SigmaOut output_type;
01358
01359 state_type new_state() {
01360 state_type q = super::new_state();
01361
01362 output_slots.resize(q * 256 + 256, false);
01363 return q;
01364 }
01365
01366 output_type* output(state_type q, char_type c) {
01367 return output_slots[pack(q, c)] == true ? &outputs[pack(q, c)] : NULL;
01368 }
01369
01370 void set_output(state_type q, char_type c, const output_type &o) {
01371 unsigned int packed = pack(q, c);
01372 output_slots[packed] = true;
01373 outputs[packed] = o;
01374 }
01375
01376 protected:
01377 tr1::unordered_map<unsigned int, SigmaOut> outputs;
01378 vector<bool> output_slots;
01379
01380 unsigned int pack(state_type q, char_type c) const {
01381 return (((unsigned int) q) << 8) | (unsigned char) c;
01382 }
01383 };
01384
01385 template <>
01386 class lazy_cursor<regexp_capture_cursor, DFT_matrix_mini> : public cursor_concept
01387 {
01388 public:
01389 typedef regexp_capture_cursor Cursor;
01390 #ifdef USE_HASH_TABLE
01391 typedef DFT_matrix_mini<const regexp_capture_cursor::ordered_state_type*,
01392 regexp_capture_cursor::tag_type> DFA_type;
01393 #else
01394 typedef DFT_matrix_mini<const regexp_capture_cursor::state_type*,
01395 regexp_capture_cursor::tag_type> DFA_type;
01396 #endif
01397
01398 protected:
01399 #ifdef USE_HASH_TABLE
01400 typedef std::map<regexp_capture_cursor::ordered_state_type, DFA_type::state_type> mapper;
01401 #else
01402 typedef std::map<regexp_capture_cursor::state_type, DFA_type::state_type> mapper;
01403 #endif
01404
01405 typedef unsigned long offset_type;
01406
01407 Cursor c;
01408 smart_ptr<DFA_type> dfa;
01409 smart_ptr<mapper> mapping;
01410 DFA_type::state_type my_sink;
01411 DFA_type::state_type q;
01412
01413 public:
01414 typedef lazy_cursor self;
01415 typedef DFA_type::state_type state_type;
01416 typedef Cursor::char_type char_type;
01417 typedef Cursor::char_traits char_traits;
01418 typedef Cursor::configuration configuration;
01419 typedef configuration tag_type;
01420
01421 protected:
01422 const configuration *conf;
01423
01424 public:
01425 lazy_cursor(const Cursor &x)
01426 : c(x), my_sink(dfa->new_state()), q(DFA_type::null_state), conf(NULL)
01427 {
01428 if (!x.sink()) {
01429 q = dfa->new_state();
01430 #ifdef USE_HASH_TABLE
01431 dfa->tag(q) = &(mapping->insert(make_pair(regexp_capture_cursor::ordered_state_type(c.src().begin(), c.src().end()), q)).first->first);
01432 #else
01433 dfa->tag(q) = &(mapping->insert(make_pair(c.src(), q)).first->first);
01434 #endif
01435 dfa->final(q) = c.src_final();
01436 dfa->initial(q);
01437 }
01438 }
01439
01440 int capture_count() const { return c.capture_count(); }
01441
01442 const Cursor& adaptee() {
01443 if (q != DFA_type::null_state)
01444 c = regexp_capture_cursor::state_type(dfa->tag(q)->begin(), dfa->tag(q)->end());
01445 return c;
01446 }
01447
01448 const DFA_type& cache() const {
01449 return *dfa;
01450 }
01451
01452 state_type src() const {
01453 return q;
01454 }
01455
01456 self& operator=(state_type p) {
01457 q = p;
01458 return *this;
01459 }
01460
01461 bool src_final() const {
01462 return dfa->final(q);
01463 }
01464
01465
01466 const tag_type& src_tag() const {
01467 static const configuration empty;
01468 if (conf != NULL)
01469 return *conf;
01470 return empty;
01471 }
01472
01473 bool sink() const {
01474 return q == DFA_type::null_state;
01475 }
01476
01477 state_type sink_state() const {
01478 return DFA_type::null_state;
01479 }
01480
01481 bool exists(const char_type &a) const {
01482 state_type aim = dfa->delta1(q, a);
01483 if (aim == DFA_type::null_state) {
01484 Cursor tmp = c;
01485 #ifdef USE_HASH_TABLE
01486 tmp = regexp_capture_cursor::state_type(dfa->tag(q)->begin(), dfa->tag(q)->end());
01487 #else
01488 tmp = *dfa->tag(q);
01489 #endif
01490 return tmp.exists(a);
01491 }
01492 return aim != my_sink;
01493 }
01494
01495 bool forward(const char_type &a) {
01496 state_type tmp = dfa->delta1(q, a);
01497 if (tmp == my_sink) {
01498 q = DFA_type::null_state;
01499 return false;
01500 }
01501 if (tmp == DFA_type::null_state) {
01502 c = regexp_capture_cursor::state_type(dfa->tag(q)->begin(), dfa->tag(q)->end());
01503 if (c.forward(a)) {
01504 #ifdef USE_HASH_TABLE
01505 std::pair<mapper::iterator, bool> p =
01506 mapping->insert(std::make_pair(regexp_capture_cursor::ordered_state_type(c.src().begin(), c.src().end()), DFA_type::null_state));
01507 #else
01508 std::pair<mapper::iterator, bool> p =
01509 mapping->insert(std::make_pair(c.src(), DFA_type::null_state));
01510 #endif
01511 if (p.second == true) {
01512 tmp = dfa->new_state();
01513 dfa->final(tmp) = c.src_final();
01514 dfa->tag(tmp) = &(p.first->first);
01515 p.first->second = tmp;
01516 }
01517 else tmp = p.first->second;
01518 dfa->set_trans(q, a, tmp);
01519 if (!c.src_tag().empty()) {
01520
01521 dfa->set_output(q, a, c.src_tag());
01522 }
01523 }
01524 else {
01525 dfa->set_trans(q, a, my_sink);
01526 q = DFA_type::null_state;
01527 return false;
01528 }
01529 }
01530
01531
01532
01533
01534
01535
01536
01537
01538
01539
01540 conf = dfa->output(q, a);
01541
01542 q = tmp;
01543 return true;
01544 }
01545 };
01546
01547 #if 0
01548
01549
01550 inline
01551 bool match(lazy_cursor<regexp_capture_cursor>& c, const char *w)
01552 {
01553 if (!c.forward('\0'))
01554 return false;
01555 for(; *w != '\0' && c.forward(*w); ++w)
01556 if (c.src_final()) return true;
01557 return *w == '\0' && c.forward('\0') && c.src_final();
01558 }
01559
01560 template <typename InputI>
01561 inline
01562 bool match(lazy_cursor<regexp_capture_cursor>& c, InputI first, InputI last)
01563 {
01564 if (!c.forward('\0'))
01565 return false;
01566 for(; first != last && c.forward(*first); ++first)
01567 if (c.src_final()) return true;
01568 return first == last && c.forward('\0') && c.src_final();
01569 }
01570
01571 template <typename ForwardI>
01572 inline
01573 ForwardI first_match(lazy_cursor<regexp_capture_cursor> &c, ForwardI first, ForwardI last)
01574 {
01575 if (!c.forward('\0')) return first;
01576 const ForwardI start = first;
01577 for(; first != last && c.forward(*first); ++first)
01578 if (c.src_final()) return ++first;
01579 return first == last && c.forward('\0') && c.src_final() ? first : start;
01580 }
01581
01582 inline
01583 const char*
01584 first_match(lazy_cursor<regexp_capture_cursor> &c, const char *text)
01585 {
01586 if (!c.forward('\0')) return text;
01587 const char *start = text;
01588 for(; *text != '\0' && c.forward(*text); ++text)
01589 if (c.src_final()) return ++text;
01590 return *text == '\0' && c.forward('\0') && c.src_final() ? text : start;
01591 }
01592
01593 #endif
01594
01595 template <typename ForwardI>
01596 inline
01597 bool astl_capture(lazy_cursor<regexp_capture_cursor, DFT_matrix_mini> &c, ForwardI first, ForwardI last, vector<unsigned long> &offsets) {
01598 for(; first != last && c.forward(*first); ++first);
01599 if (first == last && c.forward('\0') && c.src_final()) {
01600 offsets = c.src_tag();
01601 return true;
01602 }
01603 return false;
01604 }
01605
01606 #if 0
01607 template <typename ForwardI>
01608 inline
01609 ForwardI first_match(regexp_capture_cursor &c, ForwardI first, ForwardI last)
01610 {
01611
01612 const ForwardI start = first;
01613 for(regexp_capture_cursor::state_type::const_iterator i = c.src().begin(); i != c.src().end(); ++i) {
01614 cout << "< " << i->first << ':' << *i->first << ", { ";
01615 for(regexp_capture_cursor::configuration::const_iterator j = i->second.begin(); j != i->second.end(); ++j)
01616 cout << (int) *j << " ";
01617 cout << "} > ";
01618 }
01619 cout << endl;
01620 for(; first != last && c.forward(*first); ++first) {
01621 for(regexp_capture_cursor::state_type::const_iterator i = c.src().begin(); i != c.src().end(); ++i) {
01622 cout << "< " << i->first << ':' << *i->first << ", { ";
01623 for(regexp_capture_cursor::configuration::const_iterator j = i->second.begin(); j != i->second.end(); ++j)
01624 cout << (int) *j << " ";
01625 cout << "} > ";
01626 }
01627 cout << endl;
01628 if (c.src_final()) return ++first;
01629 }
01630 return first == last && c.forward('\0') && c.src_final() ? first : start;
01631 }
01632
01633 #endif
01634
01635
01636 }
01637
01638 #endif