void start_parsing() { int i = 0; int n = 0; const char *p_start = NULL, *s_start = NULL; int p_len = 0, s_len = 0; while (this->buff[i]) { char ch = this->buff[i]; DCERR("State: "<<this->state<<", read: "<<ch<<"\n"); switch (this->state) { case ILP_BEFORE_NON_WS: if (!isspace(ch)) { this->state = ILP_WEIGHT; } else { ++i; } break; case ILP_WEIGHT: if (isdigit(ch)) { n *= 10; n += (ch - '0'); ++i; } else { this->state = ILP_BEFORE_PTAB; on_weight(n); } break; case ILP_BEFORE_PTAB: if (ch == '\t') { this->state = ILP_AFTER_PTAB; } ++i; break; case ILP_AFTER_PTAB: if (isspace(ch)) { ++i; } else { p_start = this->buff + i; this->state = ILP_PHRASE; } break; case ILP_PHRASE: // DCERR("State: ILP_PHRASE: "<<buff[i]<<endl); if (ch != '\t') { ++p_len; } else { // Note: Skip to ILP_SNIPPET since the snippet may // start with a white-space that we wish to // preserve. // // this->state = ILP_AFTER_STAB; this->state = ILP_SNIPPET; s_start = this->buff + i + 1; } ++i; break; case ILP_AFTER_STAB: if (isspace(ch)) { this->state = ILP_SNIPPET; s_start = this->buff + i; } else { ++i; } break; case ILP_SNIPPET: ++i; ++s_len; break; }; } on_phrase(p_start, p_len); on_snippet(s_start, s_len); }
void start_parsing() { int i = 0; // The current record byte-offset. int n = 0; // Temporary buffer for numeric (integer) fields. const char *p_start = NULL; // Beginning of the phrase. const char *s_start = NULL; // Beginning of the snippet. int p_len = 0; // Phrase Length. int s_len = 0; // Snippet length. while (this->buff[i]) { char ch = this->buff[i]; DCERR("["<<this->state<<":"<<ch<<"]"); switch (this->state) { case ILP_BEFORE_NON_WS: if (!isspace(ch)) { this->state = ILP_WEIGHT; } else { ++i; } break; case ILP_WEIGHT: if (isdigit(ch)) { n *= 10; n += (ch - '0'); ++i; } else { this->state = ILP_BEFORE_PTAB; on_weight(n); } break; case ILP_BEFORE_PTAB: if (ch == '\t') { this->state = ILP_AFTER_PTAB; } ++i; break; case ILP_AFTER_PTAB: if (isspace(ch)) { ++i; } else { p_start = this->buff + i; this->state = ILP_PHRASE; } break; case ILP_PHRASE: // DCERR("State: ILP_PHRASE: "<<buff[i]<<endl); if (ch != '\t') { ++p_len; } else { // Note: Skip to ILP_SNIPPET since the snippet may // start with a white-space that we wish to // preserve. // this->state = ILP_SNIPPET; s_start = this->buff + i + 1; } ++i; break; case ILP_SNIPPET: ++i; ++s_len; break; }; } DCERR("\n"); on_phrase(p_start, p_len); on_snippet(s_start, s_len); }