token_stream_t convert_buffer_to_token_stream(char* buffer, size_t size) { //in this function convert buffer to the token stream, which is a linked list token_t head_token = create_token_with_type(HEAD, NULL, 0);//which created the dummy token token_t curr_token = head_token; token_t prev_token = NULL; token_stream_t head_stream = (token_stream_t)malloc(sizeof(token_stream_t)); token_stream_t curr_stream = head_stream; curr_stream->head = head_token; curr_stream->tail = head_token;//now the head and tail both points to the head_token int line = 1; int index = 0; char c = *buffer; while(index<(int)size) { printf("index=%d, size = %d",(int)index,(int)size); if(isword(c) )//if c is word { size_t word_length = 5 ; size_t word_position = 0; char *word = (char*)malloc(sizeof(word_length)); while(isword(c)) { word[word_position] = c;//assign the c to corresponding position printf("test convert_buffer_to_token_stream1\n"); printf("c=%c \n",c); if(word_position == word_length)//if reaches the length { word_length*=2; word = (char*)realloc(word,word_length);//resize the word length } word_position++;index++;buffer++;c = *buffer;//update the char and index } token_t now = create_token_with_type(WORD,word,line); curr_token ->next = now; prev_token = curr_token;//assign the previous token to now curr_token = curr_token->next; curr_token->prev = prev_token; prev_token->next = curr_token; curr_token->next = NULL; } else if(c == '\n')//if c is the new line { line++; if(curr_token->t_type ==LEFT_DERCTION||curr_token->t_type ==RIGHT_DERCTION) //LEFT direction and right direction cannot be followed by new line { //error(2, 0, "Line %d: Newline cannot follow redirects.", line); return NULL; } //if the current token type is word or subshell which indicated a new command index++; if (index == size) { printf("break!" ); break; } else { buffer++;c = *buffer;} if(curr_token->t_type==WORD||curr_token->t_type==SUBSHELL) {//create a new token stream curr_stream->next = (token_stream_t)malloc(sizeof(token_stream_t)); curr_stream = curr_stream->next; curr_stream->head = create_token_with_type(HEAD, NULL, -1); curr_token = curr_stream->head; } //else just treat is as white space printf("test convert_buffer_to_token_stream2 \n"); printf("c=%c \n",c); } else if(c == ' ' || c =='\t') { index++;buffer++;c = *buffer; //treated it as white space } else if(c == '<')//left direction { curr_token->next = create_token_with_type(LEFT_DERCTION,NULL,line); prev_token = curr_token;//assign the previous token to now curr_token = curr_token->next; curr_token->prev = prev_token; prev_token->next = curr_token; curr_token->next = NULL; printf("test convert_buffer_to_token_stream3 \n"); printf("c=%c \n",c); buffer++; index++; c = *buffer; } else if (c == '>') // RIGHT REDIRECT { curr_token->next = create_token_with_type(RIGHT_DERCTION, NULL, line); prev_token = curr_token;//assign the previous token to now curr_token = curr_token->next; curr_token->prev = prev_token; prev_token->next = curr_token; curr_token->next = NULL; printf("test convert_buffer_to_token_stream3 \n"); printf("c=%c \n",c); buffer++; index++; c = *buffer; } else if (c == ';') // SEQUENCE command { curr_token->next = create_token_with_type(SEMICOLON, NULL, line); prev_token = curr_token;//assign the previous token to now curr_token = curr_token->next; curr_token->prev = prev_token; prev_token->next = curr_token; curr_token->next = NULL; printf("test convert_buffer_to_token_stream3 \n"); printf("c=%c \n",c); buffer++; index++; c = *buffer; } else if (c == '&') // and { printf("test convert_buffer_to_token_stream4 \n"); printf("c=%c \n",c); buffer++; //my code index++; c=*buffer; if(buffer[0]!='&') { //error(2, 0, "Line %d: Syntax error. & must be followed by &", line); return NULL; } else if(buffer[0]=='&') { curr_token->next = create_token_with_type(AND, NULL, line); prev_token = curr_token;//assign the previous token to now curr_token = curr_token->next; curr_token->prev = prev_token; prev_token->next = curr_token; curr_token->next = NULL; printf("test convert_buffer_to_token_stream4 \n"); printf("c=%c \n",c); buffer++; index++; c = *buffer; } } else if (c == '|') // OR or PIPELINE { printf("test convert_buffer_to_token_stream4 \n"); printf("c=%c \n",c); buffer++; index++; c=*buffer; if(buffer[0]==' ') { curr_token->next = create_token_with_type(PIPELINE, NULL, line); prev_token = curr_token;//assign the previous token to now curr_token = curr_token->next; curr_token->prev = prev_token; prev_token->next = curr_token; curr_token->next = NULL; //buffer++; index++; c = *buffer; } else if(buffer[0]=='|') { curr_token->next = create_token_with_type(OR, NULL, line); prev_token = curr_token;//assign the previous token to now curr_token = curr_token->next; curr_token->prev = prev_token; prev_token->next = curr_token; curr_token->next = NULL; printf("test convert_buffer_to_token_stream5 \n"); printf("c=%c \n",c); buffer++; index++; c = *buffer; } /* else { error(2, 0, "Line %d: Syntax error. | only can be followed by | or ", line); return NULL; }*/ } if (c == '(') // SUBSHELL { int subshell_line = line; int nested = 1; size_t count = 0; size_t subshell_size = 64; char* subshell = (char*)malloc(subshell_size); // grab contents until subshell is closed while (nested > 0) { buffer++; index++; c = *buffer; //to examine the next char if (index == size) { //error(2, 0, "Line %d: Syntax error. EOF reached before subshell was closed.", subshell_line); return NULL; } if (c == '\n') { // consume all following whitespace while (buffer[1] == ' ' || buffer[1] == '\t' || buffer[1] == '\n') { if (buffer[1] == '\n') line++; buffer++; index++; } // pass semicolon c = ';'; line++; } else if (c == '(') // count for nested subshells nested++; else if (c == ')') // close subshell { nested--; if (nested == 0) // break if outermost subshell is closed { buffer++; index++; c = *buffer; // consume last close parens break; } } printf("test convert_buffer_to_token_stream6 \n"); printf("c=%c \n",c); // load into subshell buffer subshell[count] = c; count++; // expand subshell buffer if necessary if (count == subshell_size) { subshell_size = subshell_size * 2; subshell = (char*)realloc (subshell, subshell_size); } printf("test convert_buffer_to_token_stream7 \n"); printf("c=%c \n",c); } // create subshell token curr_token->next = create_token_with_type(SUBSHELL, subshell, subshell_line); // curr_token = curr_token->next; prev_token = curr_token;//assign the previous token to now curr_token = curr_token->next; curr_token->prev = prev_token; prev_token->next = curr_token; curr_token->next = NULL; } else if (c == ')') // CLOSE PARENS { //error(2, 0, "Line %d: Syntax error. Close parens found without matching open parens.", line); return NULL; } } printf("return headstream"); return head_stream; }
char* valid(Play *play) { /* Return NULL if play is valid, or a reason if the play is invalid. */ Pos p; Pos p2; int j; char c; static char buf[LLEN]; int n; Bool newletter; /* uses at least one new letter */ Bool crosscentre; /* crosses the centre square */ Bool hasanchor; /* crosses at least one anchor */ if(DBG) { print("assert (%d,%d,%c)", play->pos.x, play->pos.y, play->o == LR ? 'H' : 'V'); wordprint(&play->word); } p = play->pos; p2 = NEXT(play->pos,play->o); if (HASLETTER(p2)){ return "abuts another word\n"; } if(!isword(root, &play->word)){ return "not a word"; } newletter = crosscentre = hasanchor = false; /* For each letter of the word. */ for(j= play->word.n - 1; j>=0; j--) { if (p.x < 0 || p.y < 0 || p.x > BLEN || p.y > BLEN) return "off the edge"; c = play->word.c[j]; if (ISANCHOR(p)){ hasanchor = true; } if (HASLETTER(p)) { if (LETTER(p) != c){ sprintf(buf,"wanted %c, got %c at (%d,%d)", c+'a', LETTER(p)+'a', p.x, p.y); return buf; } } else { newletter = true; if(!firstmove){ if(!(CROSS(p,ORTHO(play->o)) & 1 << c)) { sprintf(buf,"invalid cross word at (%d,%d)",p.x,p.y); return buf; } } } if (p.x == 8 && p.y ==8){ crosscentre = true; } p = PREV(p,play->o); } if (firstmove){ DPRINT("FIRSTMOVE\n"); if (!crosscentre) return ("first move doesn't touch centre square"); } if (!(hasanchor|| firstmove)){ return ("not attached to another word"); } if(HASLETTER(p)) return "abutting another word"; if (! newletter) return "adds no letters"; return (char*)0; }
int main() { int i = 0 ,j = 0, k = 0 , l = 0; int curr_page_num = 0; char line[100]; for( i = 0; i < 10000; i++) { words[i].str = malloc(100); words[i].dup_str = malloc(100); words[i].idx = malloc(50); words[i].num_idx = 0; } while(1) { if( fgets(line,100,stdin) == NULL ) break; int pagenumberFLAG = 1; line[strlen(line)-1] = '\0'; for(i = 0; i < strlen(line); i++) { if( !isdigit(line[i]) ) pagenumberFLAG = 0; } // printf("pagenumberFLAG = %d\n",pagenumberFLAG); if( pagenumberFLAG == 1) { curr_page_num = atoi(line); continue; } // puts(line); // printf("len of line = %lu\n",strlen(line)); i = 0; for(; i < strlen(line); ) { // printf("i* = %d\n",i); if(line[i] == ' ' ) { i++; continue; } char temp_str[100]; k = 0; while(line[i] != ' ') { if(line[i] == '\0') break; // printf("line[%d] = %c\n",i,line[i]); temp_str[k++] = line[i]; i++; } temp_str[k] = '\0'; // printf("i = %d\n",i); // printf("temp_str = %s\n",temp_str); // printf("len_temp_str = %lu\n",strlen(temp_str)); // now compare this temp_str with all the existing strings in words[] int flag_str = 0; // printf("num_words = %d\n",num_words); int la = 0; for(;temp_str[la];la++)temp_str[la] = tolower(temp_str[la]); for(j = 0;j < num_words; j++) { if( strcmp(words[j].str,temp_str) == 0 ) { flag_str = 1; if( words[j].idx[words[j].num_idx-1] != curr_page_num ) { words[j].idx[words[j].num_idx] = curr_page_num;// index updated words[j].num_idx++; } break; } } if(flag_str == 0 ) { if( isNumeric( temp_str ) ) continue; if( !isword(temp_str)) continue; strcpy(words[num_words].str,temp_str); // str updated words[num_words].idx[0] = curr_page_num;// index updated words[num_words].num_idx++; num_words++; } } } for(i = 0; i < num_words; i++) strcpy(words[i].dup_str,words[i].str); // puts("PRINTITITITITITITI\n\n\n"); // print_words(); radix_sort(); // puts("\n\nFINAL ANS"); print_words(); /********* input is done *******/ return 0; }
int main(int argc, char *argv[]) { static char *desc[] = { "[TT]hrefify[tt] adds href's for all the words in the input file which are not", "already hyperlinked and which appear in the file specified with the", "option [TT]-l[tt].[PAR]", "If the href's should call a script, text can be added", "with the [TT]-t[tt] option." }; int n; char **text,**str,line[1024],*ptr,*ref, start[STRLEN],word[STRLEN],end[STRLEN]; int n_text,n_str,i_str; gmx_bool bInHREF,bIn; FILE *fp; char title[STRLEN]; int i,l,n_repl; t_filenm fnm[] = { { efDAT, "-l", "links", ffLIBRD }, }; #define NFILE asize(fnm) static char *in=NULL,*out=NULL,*excl=NULL,*link_text=NULL; static gmx_bool peratom=FALSE; t_pargs pa[] = { { "-f", FALSE, etSTR, { &in } , "HTML input" }, { "-o", FALSE, etSTR, { &out } , "HTML output" }, { "-e", FALSE, etSTR, { &excl } , "Exclude a string from HREF's, " "when this option is not set, the filename without path and extension " "will be excluded from HREF's"}, { "-t", FALSE, etSTR, { &link_text } , "Insert a string in front of the " "href file name, useful for scripts" } }; CopyRight(stderr,argv[0]); parse_common_args(&argc,argv,0,NFILE,fnm,asize(pa),pa, asize(desc),desc,0,NULL); if (!in || !out) gmx_fatal(FARGS,"Input or output filename is not set"); n_text = get_file(in, &text); fprintf(stderr,"Read %d lines from %s\n",n_text,in); n_str=get_file(ftp2fn(efDAT,NFILE,fnm),&str); fprintf(stderr,"Read %d strings %s\n",n_str,ftp2fn(efDAT,NFILE,fnm)); if (!excl) { for (i=strlen(in)-1; i>0 && in[i-1]!='/'; i--); excl=strdup(in+i); for(i=strlen(excl)-1; i>0 && (excl[i]!='.'); i--); if (excl[i]=='.') excl[i]='\0'; } fprintf(stderr,"Excluding '%s' from references\n",excl); for(l=0; l<n_str && strcasecmp(str[l],excl); l++); if (l<n_str) { for(i=l+1; i<n_str; i++) str[i-1]=str[i]; n_str--; } if (!link_text) link_text=strdup("\0"); else fprintf(stderr,"Adding '%s' to href's\n",link_text); fp=gmx_ffopen(out,"w"); n_repl=0; i_str=-1; bInHREF=FALSE; for(l=0; l<n_text; l++) { strcpy(line,text[l]); do { bIn=bInHREF; ptr=strstr_href(line,&bIn,&i_str,n_str,str); if (ptr) { ref=ptr; if ((ref!=line) && (ref[-1]=='.')) { ref--; while((ref>line) && isword(ref[-1])) ref--; } strcpy(start,line); start[ref-line]='\0'; strcpy(word,ref); word[ptr-ref+strlen(str[i_str])]='\0'; strcpy(end,ptr+strlen(str[i_str])); sprintf(line,"%s<a href=\"%s%s.html\">%s</a>%s", start,link_text,str[i_str],word,end); fprintf(stderr,"line %d: %s\n",l+1,str[i_str]); n_repl++; } } while (ptr); bInHREF=bIn; fprintf(fp,"%s\n",line); } gmx_ffclose(fp); fprintf(stderr,"Added %d HTML references\n",n_repl); return 0; }
unsigned vxp_fixed_token(const char *p, const char **q) { switch (p[0]) { case '!': if (p[1] == '=' && (isword(p[1]) ? !isword(p[2]) : 1)) { *q = p + 2; return (T_NEQ); } if (p[1] == '~' && (isword(p[1]) ? !isword(p[2]) : 1)) { *q = p + 2; return (T_NOMATCH); } return (0); case '(': if ((isword(p[0]) ? !isword(p[1]) : 1)) { *q = p + 1; return ('('); } return (0); case ')': if ((isword(p[0]) ? !isword(p[1]) : 1)) { *q = p + 1; return (')'); } return (0); case ',': if ((isword(p[0]) ? !isword(p[1]) : 1)) { *q = p + 1; return (','); } return (0); case ':': if ((isword(p[0]) ? !isword(p[1]) : 1)) { *q = p + 1; return (':'); } return (0); case '<': if (p[1] == '=' && (isword(p[1]) ? !isword(p[2]) : 1)) { *q = p + 2; return (T_LEQ); } if ((isword(p[0]) ? !isword(p[1]) : 1)) { *q = p + 1; return ('<'); } return (0); case '=': if (p[1] == '=' && (isword(p[1]) ? !isword(p[2]) : 1)) { *q = p + 2; return (T_EQ); } return (0); case '>': if (p[1] == '=' && (isword(p[1]) ? !isword(p[2]) : 1)) { *q = p + 2; return (T_GEQ); } if ((isword(p[0]) ? !isword(p[1]) : 1)) { *q = p + 1; return ('>'); } return (0); case '[': if ((isword(p[0]) ? !isword(p[1]) : 1)) { *q = p + 1; return ('['); } return (0); case ']': if ((isword(p[0]) ? !isword(p[1]) : 1)) { *q = p + 1; return (']'); } return (0); case 'a': if (p[1] == 'n' && p[2] == 'd' && (isword(p[2]) ? !isword(p[3]) : 1)) { *q = p + 3; return (T_AND); } return (0); case 'e': if (p[1] == 'q' && (isword(p[1]) ? !isword(p[2]) : 1)) { *q = p + 2; return (T_SEQ); } return (0); case 'n': if (p[1] == 'o' && p[2] == 't' && (isword(p[2]) ? !isword(p[3]) : 1)) { *q = p + 3; return (T_NOT); } if (p[1] == 'e' && (isword(p[1]) ? !isword(p[2]) : 1)) { *q = p + 2; return (T_SNEQ); } return (0); case 'o': if (p[1] == 'r' && (isword(p[1]) ? !isword(p[2]) : 1)) { *q = p + 2; return (T_OR); } return (0); case '{': if ((isword(p[0]) ? !isword(p[1]) : 1)) { *q = p + 1; return ('{'); } return (0); case '}': if ((isword(p[0]) ? !isword(p[1]) : 1)) { *q = p + 1; return ('}'); } return (0); case '~': if ((isword(p[0]) ? !isword(p[1]) : 1)) { *q = p + 1; return ('~'); } return (0); default: return (0); } }
size_t Matcher::match(Method method) { DBGLOG("BEGIN Matcher::match()"); if (pos_ < end_) buf_[pos_] = chr_; scan: txt_ = buf_ + cur_; len_ = 0; bool bob = at_bob(); if (hit_end() && ded_ == 0 && tab_.empty()) { if (method == Const::SPLIT && !bob && cap_ != 0 && cap_ != Const::EMPTY) { cap_ = Const::EMPTY; buf_[pos_] = '\0'; DBGLOG("Split empty at end, cap = %zu", cap_); DBGLOG("END Matcher::match()"); return cap_; } cap_ = 0; DBGLOG("END Matcher::match()"); return 0; } bool bol = bob || at_bol(); bool bow; bool eow; int c1 = got_; if (isword(c1)) { bow = false; eow = !isword(peek()); } else { bow = isword(peek()); eow = false; } ind_ = pos_; // ind scans input in buf[] in newline() up to pos - 1 size_t col = 0; // count columns from BOL if (pat_->fsm_) { fsm_.bob = bob; fsm_.bow = bow; fsm_.eow = eow; fsm_.col = col; fsm_.c1 = c1; } redo: cap_ = 0; lap_.resize(0); bool nul = method == Const::MATCH; if (pat_->fsm_) { DBGLOG("FSM code %p", pat_->fsm_); fsm_.bol = bol; fsm_.nul = nul; pat_->fsm_(*this); col = fsm_.col; nul = fsm_.nul; c1 = fsm_.c1; } else if (pat_->opc_) { const Pattern::Opcode *pc = pat_->opc_; while (true) { Pattern::Opcode opcode = *pc; DBGLOG("Fetch: code[%zu] = 0x%08X", pc - pat_->opc_, opcode); Pattern::Index index; switch (opcode >> 16) { case 0x00ff: // check if HALT if (Pattern::is_opcode_halt(opcode)) goto done; break; case 0xff00: // TAKE cap_ = Pattern::index_of(opcode); DBGLOG("Take: cap = %zu", cap_); cur_ = pos_; ++pc; continue; case 0xff7e: // TAIL index = Pattern::index_of(opcode); DBGLOG("Tail: %u", index); if (lap_.size() > index && lap_[index] >= 0) cur_ = txt_ - buf_ + static_cast<size_t>(lap_[index]); // mind the (new) gap ++pc; continue; case 0xff7f: // HEAD index = Pattern::index_of(opcode); DBGLOG("Head: lookahead[%u] = %zu", index, pos_ - (txt_ - buf_)); if (lap_.size() <= index) lap_.resize(index + 1, -1); lap_[index] = static_cast<int>(pos_ - (txt_ - buf_)); // mind the gap ++pc; continue; case 0xff00 | Pattern::META_DED: if (ded_ > 0) { index = Pattern::index_of(opcode); DBGLOG("Dedent ded = %zu", ded_); // unconditional dedent matching \j nul = true; pc = pat_->opc_ + index; continue; } } int c0 = c1; if (c0 == EOF) break; c1 = get(); DBGLOG("Get: c1 = %d", c1); index = Pattern::IMAX; Pattern::Index back = Pattern::IMAX; // where to jump back to (backtrack on meta transitions) Pattern::Index la; while (true) { if (index == Pattern::IMAX || back == Pattern::IMAX) // we no longer have to pass through all if index and back are set { switch (opcode >> 16) { case 0xff00: // TAKE cap_ = Pattern::index_of(opcode); DBGLOG("Take: cap = %zu", cap_); cur_ = pos_; if (c1 != EOF) --cur_; // Must unget one char opcode = *++pc; continue; case 0xff7e: // TAIL la = Pattern::index_of(opcode); DBGLOG("Tail: %u", la); if (lap_.size() > la && lap_[la] >= 0) cur_ = txt_ - buf_ + static_cast<size_t>(lap_[la]); // mind the (new) gap opcode = *++pc; continue; case 0xff7f: // HEAD opcode = *++pc; continue; case 0xff00 | Pattern::META_DED: DBGLOG("DED? %d", c1); if (index == Pattern::IMAX && bol && dedent(col)) index = Pattern::index_of(opcode); opcode = *++pc; continue; case 0xff00 | Pattern::META_IND: DBGLOG("IND? %d", c1); if (index == Pattern::IMAX && bol && indent(col)) index = Pattern::index_of(opcode); opcode = *++pc; continue; case 0xff00 | Pattern::META_EOB: DBGLOG("EOB? %d", c1); if (index == Pattern::IMAX && c1 == EOF) index = Pattern::index_of(opcode); opcode = *++pc; continue; case 0xff00 | Pattern::META_BOB: DBGLOG("BOB? %d", bob); if (index == Pattern::IMAX && bob) index = Pattern::index_of(opcode); opcode = *++pc; continue; case 0xff00 | Pattern::META_EOL: DBGLOG("EOL? %d", c1); if (index == Pattern::IMAX && (c1 == EOF || c1 == '\n')) index = Pattern::index_of(opcode); opcode = *++pc; continue; case 0xff00 | Pattern::META_BOL: DBGLOG("BOL? %d", bol); if (index == Pattern::IMAX && bol) index = Pattern::index_of(opcode); opcode = *++pc; continue; case 0xff00 | Pattern::META_EWE: DBGLOG("EWE? %d %d %d", c0, c1, isword(c0) && !isword(c1)); if (index == Pattern::IMAX && isword(c0) && !isword(c1)) index = Pattern::index_of(opcode); opcode = *++pc; continue; case 0xff00 | Pattern::META_BWE: DBGLOG("BWE? %d %d %d", c0, c1, !isword(c0) && isword(c1)); if (index == Pattern::IMAX && !isword(c0) && isword(c1)) index = Pattern::index_of(opcode); opcode = *++pc; continue; case 0xff00 | Pattern::META_EWB: DBGLOG("EWB? %d", eow); if (index == Pattern::IMAX && eow) index = Pattern::index_of(opcode); opcode = *++pc; continue; case 0xff00 | Pattern::META_BWB: DBGLOG("BWB? %d", bow); if (index == Pattern::IMAX && bow) index = Pattern::index_of(opcode); opcode = *++pc; continue; case 0xff00 | Pattern::META_NWE: DBGLOG("NWE? %d %d %d", c0, c1, isword(c0) == isword(c1)); if (index == Pattern::IMAX && isword(c0) == isword(c1)) index = Pattern::index_of(opcode); opcode = *++pc; continue; case 0xff00 | Pattern::META_NWB: DBGLOG("NWE? %d %d", bow, eow); if (index == Pattern::IMAX && !bow && !eow) index = Pattern::index_of(opcode); opcode = *++pc; continue; } } if (index != Pattern::IMAX) { DBGLOG("Backtrack: pc = %u", index); if (back == Pattern::IMAX) back = static_cast<Pattern::Index>(pc - pat_->opc_); pc = pat_->opc_ + index; opcode = *pc; index = Pattern::IMAX; } else { if (back != Pattern::IMAX) { pc = pat_->opc_ + back; opcode = *pc; } break; } } if (c1 == EOF) break; Pattern::Opcode lo = c1 << 24; Pattern::Opcode hi = lo | 0x00ffffff; while (hi < opcode || lo > (opcode << 8)) opcode = *++pc; index = Pattern::index_of(opcode); if (index == Pattern::IMAX) break; pc = pat_->opc_ + index; } } done: if (bol && cap_ != Const::EMPTY) { if (col > 0 && (tab_.empty() || tab_.back() < col)) { DBGLOG("Set new stop: tab_[%zu] = %zu", tab_.size(), col); tab_.push_back(col); } else if (!tab_.empty() && tab_.back() > col) { size_t n; for (n = tab_.size() - 1; n > 0; --n) if (tab_.at(n - 1) <= col) break; ded_ += tab_.size() - n; DBGLOG("Dedents: ded = %zu tab_ = %zu", ded_, tab_.size()); tab_.resize(n); if (n > 0) tab_.back() = col; // adjust stop when indents are not aligned (Python would give an error) } } if (ded_ > 0) { DBGLOG("Dedents: ded = %zu", ded_); if (col == 0 && bol) { ded_ += tab_.size(); tab_.resize(0); DBGLOG("Rescan for pending dedents: ded = %zu", ded_); pos_ = ind_; bol = false; // avoid looping, match \j exactly goto redo; } --ded_; } if (method == Const::SPLIT) { DBGLOG("Split: len = %zu cap = %zu cur = %zu pos = %zu end = %zu txt-buf = %zu eob = %d", len_, cap_, cur_, pos_, end_, txt_-buf_, (int)eof_); if (cap_ == 0 || (cur_ == static_cast<size_t>(txt_ - buf_) && !bob)) { if (!hit_end()) { ++len_; DBGLOG("Split continue: len = %zu", len_); set_current(++cur_); goto redo; } cap_ = Const::EMPTY; set_current(pos_); // chr_ = static_cast<unsigned char>(buf_[pos_]); buf_[pos_] = '\0'; DBGLOG("Split at eof: cap = %zu txt = '%s' len = %zu", cap_, txt_, len_); DBGLOG("END Matcher::match()"); return cap_; } if (bob && cur_ == 0 && hit_end()) cap_ = Const::EMPTY; set_current(cur_); buf_[txt_ - buf_ + len_] = '\0'; DBGLOG("Split: txt = '%s' len = %zu", txt_, len_); DBGLOG("END Matcher::match()"); return cap_; } len_ = cur_ - (txt_ - buf_); if (len_ == 0 && !nul) { DBGLOG("Empty match cur = %zu pos = %zu end = %zu", cur_, pos_, end_); pos_ = cur_; if (hit_end()) { set_current(cur_); DBGLOG("Reject empty match at EOF"); cap_ = 0; } else if (method == Const::FIND) { set_current(++cur_); // skip one unrecognized char DBGLOG("Reject and continue?"); if (cap_ == 0 || !opt_.N) goto scan; DBGLOG("Accept empty match"); buf_[pos_ - 1] = '\0'; } else { set_current(cur_); DBGLOG("Reject empty match"); cap_ = 0; } } else { if (cur_ == end_ && len_ == 0) { DBGLOG("Hit end: got = %d", got_); if (cap_ == Const::EMPTY && !opt_.A) cap_ = 0; // cannot goto scan? } else { set_current(cur_); if (len_ > 0) { if (cap_ == Const::EMPTY && !opt_.A) { DBGLOG("Ignore accept and continue: len = %zu", len_); if (method != Const::MATCH) goto scan; cap_ = 0; } } } } buf_[pos_] = '\0'; DBGLOG("Return: cap = %zu txt = '%s' len = %zu got = %d", cap_, txt_, len_, got_); DBGLOG("END match()"); return cap_; }