int main(){ FILE * f; f= stdin; #ifdef debug f = fopen("input/WheresWaldorf.in", "r"); #endif fscanf(f, "%d", &T); while(T--){ fscanf(f, "%d %d", &m, &n); for(int i = 0 ; i < m ; i++){ fscanf(f, "%s", grid[i]); } fscanf(f, "%d", &words); int dir_x[] = {1,-1, 0, 0, 1,-1,-1, 1}; int dir_y[] = {0, 0, 1,-1, 1, 1,-1,-1}; char word[100]; for(int w = 0 ; w < words ; w++){ fscanf(f, "%s", word); for(int i = 0 ; i < m ; i++){ for(int j = 0 ; j < n ; j++){ for(int dir = 0 ; dir < 8 ; dir++){ if(isWord(word, strlen(word), i,j, dir_y[dir],dir_x[dir])){ printf("%d %d\n", i + 1,j + 1); i = m; j = n; break; } } } } } if(T)printf("\n"); } return 0; }
std::string addSpaces(std::string text) { std::string spaced = ""; std::string buffer = ""; unsigned processed = 0; while (processed < text.size()) { int max = 0; int word_len = 0; for (unsigned i = 15; i > 0; --i) { if(i + processed > text.size()) continue; if (isWord(text.substr(processed, i).c_str())) { int lookaheadCount = getLookaheadCount(text, i + processed, 0); if (lookaheadCount == MAX_LOOKAHEAD) { word_len = i; break; } else if(lookaheadCount > max) { word_len = i; max = lookaheadCount; } } } if(word_len == 0) { buffer += text[processed]; word_len = 1; } else { if(buffer.size() > 0) { spaced += buffer + " "; buffer = ""; } spaced += text.substr(processed, word_len) + " "; } processed += word_len; } spaced += buffer; return spaced; }
void WordList::wordsInHelper(const QString &given, const QString &left, QStringList &ret) { for(int i=0; i<left.length(); i++) { QString part = given + left[i]; if(isPartialWord(part)) { if(isWord(part)) { if(ret.contains(part)) return;//We've hit a duplicate letter arrangement ret << part; } QString nowLeft(left); nowLeft.remove(i,1); wordsInHelper(part, nowLeft, ret); } } }
vector<string> wordBreak(string s, unordered_set<string> &dict) { vector<bool> subword_break (s.size() + 1, false); vector<vector<bool> > prev (s.size() + 1, vector<bool> (s.size() + 1, false)); subword_break[0] = true; for (int subword_length = 1; subword_length <= s.size(); ++subword_length) { for (int break_at = subword_length - 1; break_at >= 0; --break_at) { if (subword_break[break_at] && isWord(s.substr(break_at, subword_length - break_at), dict)) { subword_break[subword_length] = true; prev[subword_length][break_at] = true; } } } vector<string> ret; vector<string> path; get_path(s, prev, s.size(), path, ret); return ret; }
//////////////////////////////////////////////////////////////////////////////// // When a Lexer object is constructed with a string, this method walks through // the stream of low-level tokens. bool Lexer::token (std::string& token, Lexer::Type& type) { // Eat white space. while (isWhitespace (_text[_cursor])) utf8_next_char (_text, _cursor); // Terminate at EOS. if (isEOS ()) return false; // The sequence is specific, and must follow these rules: // - date < duration < uuid < identifier // - dom < uuid // - uuid < hex < number // - url < pair < identifier // - hex < number // - separator < tag < operator // - path < substitution < pattern // - set < number // - word last if (isString (token, type, "'\"") || isDate (token, type) || isDuration (token, type) || isURL (token, type) || isPair (token, type) || isUUID (token, type, true) || isSet (token, type) || isDOM (token, type) || isHexNumber (token, type) || isNumber (token, type) || isSeparator (token, type) || isTag (token, type) || isPath (token, type) || isSubstitution (token, type) || isPattern (token, type) || isOperator (token, type) || isIdentifier (token, type) || isWord (token, type)) return true; return false; }
int getLookaheadCount(const std::string& text, unsigned start, int depth) { if(depth == MAX_LOOKAHEAD) return 0; int max_depth = 0; for (unsigned i = 15; i > 0; --i) { if(i + start > text.size()) continue; if(isWord(text.substr(start, i).c_str())) { int test = getLookaheadCount(text, start + i, depth + 1); if(test > max_depth) max_depth = test; } } return max_depth + 1; }
//---------------------------------------------------------------------- static void process_operand(op_t &x,int isAlt,int isload) { switch ( x.type ) { case o_reg: case o_phrase: case o_reglist: return; case o_imm: QASSERT(10094, isload); process_immediate_number(x.n); if ( op_adds_xrefs(uFlag, x.n) ) ua_add_off_drefs2(x, dr_O, OOFS_IFSIGN|OOFW_IMM); break; case o_displ: process_immediate_number(x.n); if ( isAlt ) break; if ( op_adds_xrefs(uFlag, x.n) ) { ea_t ea = ua_add_off_drefs2(x, isload ? dr_R : dr_W, get_displ_outf(x)); if ( ea != BADADDR ) { ua_dodata2(x.offb, ea, x.dtyp); if ( !isload ) doVar(ea); } } // create stack variables if required if ( may_create_stkvars() && !isDefArg(uFlag, x.n) ) { func_t *pfn = get_func(cmd.ea); if ( pfn != NULL && (issp(x.phrase) || isbp(x.phrase) && (pfn->flags & FUNC_FRAME) != 0) ) { if ( ua_stkvar2(x, x.addr, STKVAR_VALID_SIZE) ) op_stkvar(cmd.ea, x.n); } } break; case o_near: add_code_xref(x, calc_mem(x.addr)); break; case o_mem: case o_memind: { ea_t ea = calc_mem(x.addr); if ( !isEnabled(ea) && find_sym(ea) ) break; // address not here ua_add_dref(x.offb, ea, isload ? dr_R : dr_W); ua_dodata2(x.offb, ea, x.dtyp); if ( x.type == o_memind ) { ssize_t size = get_dtyp_size(x.dtyp); flags_t F = getFlags(ea); if ( (isWord(F) || isDwrd(F)) && (!isDefArg0(F) || isOff0(F)) ) { ea_t target = calc_mem(size == 2 ? get_word(ea) : (get_long(ea) & 0xFFFFFFL)); if ( isEnabled(target) ) add_code_xref(x, target); if ( !isOff0(F) ) set_offset(ea, 0, calc_mem(0)); } break; } if ( !isload ) doVar(ea); } break; default: INTERR(10095); } }
static bool parseEncrypObject(FILE *file, EncData *e) { int ch, dict = 1; bool fe = false; bool ff = false; bool fl = false; bool fo = false; bool fp = false; bool fr = false; bool fu = false; bool fv = false; p_str *str = NULL; ch = getc(file); while(ch != EOF) { if(ch == '>') { ch = getc(file); if(ch == '>') { dict--; if(dict <= 0) break; } } else if(ch == '<') { ch = getc(file); if(ch == '<') { dict++; } } if(ch == '/') { ch = getc(file); switch(ch) { case 'E': if(isWord(file, "ncryptMetadata")) { ungetc(parseWhiteSpace(file), file); if(isWord(file, "false")) fe = true; } break; case 'F': if(isWord(file, "ilter")) { char *s_handler = parseName(file); if(s_handler != NULL) { e->s_handler = s_handler; ff = true; } break; } case 'L': if(isWord(file, "ength")) { int tmp_l = parseIntWithC(file,parseWhiteSpace(file)); if(!fl) { /* BZZZT!! This is sooo wrong but will work for most cases. only use the first length we stumble upon */ e->length = tmp_l; } fl = true; } break; case 'O': if(str) { if(str->content) free(str->content); free(str); str = NULL; } str = parseRegularString(file); if(!str) break; if(str->len != 32) fprintf(stderr, "WARNING: O-String != 32 Bytes: %d\n", str->len); e->o_string = str->content; free(str); str = NULL; fo = true; break; case 'P': ch = getc(file); if(isWhiteSpace(ch)) { ch = parseWhiteSpace(file); e->permissions = parseIntWithC(file,ch); fp = true; } break; case 'R': ch = getc(file); if(isWhiteSpace(ch)) { ch = parseWhiteSpace(file); e->revision = parseIntWithC(file,ch); fr = true; } break; case 'U': if(str) { if(str->content) free(str->content); free(str); str = NULL; } str = parseRegularString(file); if(!str) break; if(str->len != 32) fprintf(stderr, "WARNING: U-String != 32 Bytes: %d\n", str->len); e->u_string = str->content; free(str); str = NULL; fu = true; break; case 'V': ch = getc(file); if(isWhiteSpace(ch)) { e->version = parseIntWithC(file, parseWhiteSpace(file)); fv = true; } break; default: break; } } ch = parseWhiteSpace(file); } if(!fe) e->encryptMetaData = true; if(!fl) e->length = 40; if(!fv) e->version = 0; if(strcmp(e->s_handler,"Standard") != 0) return true; return ff & fo && fp && fr && fu; }
static int findTrailer(FILE *file, EncData *e) { int ch; /** int pos_i; */ bool encrypt = false; bool id = false; int e_pos = -1; p_str *str = NULL; ch = getc(file); while(ch != EOF) { if(isEndOfLine(ch)) { if(isWord(file, "trailer")) { /** printf("found trailer\n");*/ ch = parseWhiteSpace(file); if(ch == '<' && getc(file) == '<') { /** we can be pretty sure to have found the trailer. start looking for the Encrypt-entry */ /** pos_i = ftell(file); printf("found Trailer at pos %x\n", pos_i); */ ch = getc(file); while(ch != EOF) { if(ch == '>') { ch = getc(file); if(ch == '>') break; } while(ch != '/' && ch != EOF) { ch = getc(file); } ch = getc(file); /**printf("found a name: %c\n", ch);*/ if(e_pos < 0 && ch == 'E' && isWord(file, "ncrypt")) { e_pos = parseIntWithC(file,parseWhiteSpace(file)); if(e_pos >= 0) { /** pos_i = ftell(file); printf("found Encrypt at pos %x, ", pos_i); printf("%d\n", e_pos); */ encrypt = true; } } else if(ch == 'I' && getc(file) == 'D') { ch = parseWhiteSpace(file); while(ch != '[' && ch != EOF) ch = getc(file); if(str) { if(str->content) free(str->content); free(str); str = NULL; } str = parseRegularString(file); /** pos_i = ftell(file); printf("found ID at pos %x\n", pos_i); */ if(str) id = true; ch = getc(file); } else ch = getc(file); if(encrypt && id) { /**printf("found all, returning: epos: %d\n",e_pos);*/ e->fileID = str->content; e->fileIDLen = str->len; free(str); return e_pos; } } } } else { ch = getc(file); } } else ch = getc(file); } /** printf("finished searching\n");*/ if(str) { if(str->content) free(str->content); free(str); } if(!encrypt && id) return ETRENF; else if(!id && encrypt) return ETRINF; else return ETRANF; }
bool QsCodeParser::rightWordBoundary(const QString& str, int pos) { return isWord(str[pos - 1]) && !isWord(str[pos]); }
/////////////////////////////////////////////////////////////////// //Creates the stream of tokens for use in stack processing later.// /////////////////////////////////////////////////////////////////// struct token_node_list* create_token_stream(char* input, int num_of_chars){ //Create the token node list struct token_node_list* new_token_list= malloc(sizeof(struct token_node_list)); struct token_node_list* head_of_list = new_token_list; //Make dummy token in order to avoid NULL token_type pointer struct token_node* dummy_head = add_token(new_token_list, NULL,DUMMY_HEAD); new_token_list->head = dummy_head; char char_to_sort = *input; int nested_breaker = 0; int char_num_counter = 0; while(char_num_counter < num_of_chars){ if(char_to_sort == '\000'){ return head_of_list; } //Check to see if word if(isWord(char_to_sort)){ //If so, store the word in its own token size_t size = 8; char* w = checked_malloc(size); size_t word_index = 0; do{ w[word_index] = char_to_sort; word_index++; if(word_index == size){ size = size*2; w = checked_grow_alloc(w, &size); } char_num_counter++; //increment index input++; //increment stream pointer char_to_sort = *input; }while(isWord(char_to_sort) && char_num_counter < num_of_chars); new_token_list->cur_node = add_token(new_token_list, w, WORD); } //Handle useless white space else if(char_to_sort == ' ' || char_to_sort == '\t'){ char_num_counter++; //increment index input++; //increment stream pointer char_to_sort = *input; } //Check for subshell else if( char_to_sort == '('){ //TODO int num_pairs = 1; int open_pars = 1; int close_pars = 0; int parens_valid = 0; int index = 0; int buf_size = 10; //random low num for most purposes char *ss_buf = malloc(buf_size* sizeof(char)); if(ss_buf == NULL) fprintf(stderr, "\n Error allocating memory for subshell parse.\n"); while(num_pairs>0){ char_num_counter++; //increment index input++; //increment stream pointer char_to_sort = *input; if(char_num_counter == num_of_chars){ //gone through all chars fprintf(stderr, "\nAll characters used.\n"); return NULL; } else if(char_to_sort == '('){ open_pars++; num_pairs++; } else if(char_to_sort == ')'){ //Decrease the number of pairs. close_pars++; num_pairs--; parens_valid = open_pars-close_pars; if(num_pairs == 0 && parens_valid == 0){ char_num_counter++; input++; char_to_sort = *input; break; } } else if(char_to_sort == '\n'){ if(input[1] == '\000'){ error(6, 0, "input[1] is null, so nothing after the newline."); } while(input[1] != ' ' || input[1] != '\t' || input[1] != '\n'){ //Eliminate useless characters input++; char_to_sort++; } //Spec says to substitute semicolon for \n char_to_sort = ';'; } ss_buf[index] = char_to_sort; index++; if(index == buf_size){ buf_size+=2; ss_buf = realloc(ss_buf, buf_size*sizeof(char)); if(ss_buf == NULL){ fprintf(stderr, "\n Error reallocating memory for subshell buffer.\n"); return NULL; } } if( num_pairs == 0 && parens_valid == 0) break; else if(num_pairs == 0 && parens_valid != 0){ error(2, 0, "\n Mismatched parentheses.\n"); return NULL; } } new_token_list->cur_node = add_token(new_token_list, ss_buf, SUBSHELL); } //Check for OR and PIPE else if(char_to_sort == '|'){ char_num_counter++; input++; //increment pointer char_to_sort = *input; //Check to see if the next character is also a pipe, this is an OR if(char_to_sort == '|'){ //Add a token node for OR new_token_list->cur_node = add_token(new_token_list, NULL, OR); char_num_counter++; input++; //increment pointer char_to_sort = *input; //peek at the next character }else new_token_list->cur_node = add_token(new_token_list, NULL, PIPE); } //Check for & and AND, code is same as OR case else if(char_to_sort == '&'){ char_num_counter++; input++; //increment pointer char_to_sort = *input; //peek at the next character if(char_to_sort == '&'){ //This is an and new_token_list->cur_node = add_token(new_token_list, NULL, AND); char_num_counter++; input++; //increment pointer char_to_sort = *input; //peek at the next character }else if(char_to_sort != '&'){ error(2,0, "\n Single and...error.\n"); return NULL; } } //Check for left redirect else if(char_to_sort == '<'){ new_token_list->cur_node = add_token(new_token_list, NULL, LEFT_REDIRECT); char_num_counter++; input++; //increment pointer char_to_sort = *input; //peek at the next character } //Check for right redirect else if(char_to_sort == '>'){ new_token_list->cur_node = add_token(new_token_list, NULL, RIGHT_REDIRECT); //hurwitz char_num_counter++; input++; //increment pointer char_to_sort = *input; //peek at the next character } //Handle newline else if(char_to_sort == '\n'){ if(*input++ == '\000'){ return head_of_list; break; }else *input--; switch(new_token_list->cur_node->token_type){ case LEFT_REDIRECT: case RIGHT_REDIRECT: error(2, 0, "\n Error in syntax. Redirect before newline.\n"); return NULL; break; case WORD: case SUBSHELL: if(new_token_list->cur_node->token_type != DUMMY_HEAD){ new_token_list->next = malloc(sizeof(struct token_node_list)); if(new_token_list->next == NULL){ fprintf(stderr, "\nError allocating memory for new tree in create_token_stream.\n"); return NULL; } new_token_list = new_token_list->next; if(new_token_list == NULL) fprintf(stderr, "\n new_token_list is NULL in handling newline\n"); new_token_list->head = add_token(new_token_list, NULL, DUMMY_HEAD ); new_token_list->cur_node = new_token_list->head; } break; default: break; }/* if(char_to_sort == ';'){ new_token_list->cur_node = add_token(new_token_list, NULL, SEMICOLON); }*/ char_num_counter++; input++; //increment pointer char_to_sort = *input; //peek at the next character } //Check for semicolon else if(char_to_sort == ';'){ new_token_list->cur_node = add_token(new_token_list, NULL, SEMICOLON); char_num_counter++; //increment index input++; //increment stream pointer char_to_sort = *input; } else{ error(4, 0,"\nCharacter is not a word or a special token.\n"); return NULL; //no character matches } } //Return pointer to the top of the token_stream return head_of_list; }
void enum_members2(struc_t *st) { char buf[MAXSTR]; type_t type[MAXSTR] = {0}; int gap_cnt = 1; asize_t ofs = 0, gapend = BADADDR, gapstart = BADADDR; for (size_t i=0;i<st->memqty;i++) { member_t &mem = st->members[i]; // unexpected beginning of member? if (mem.soff != ofs) { // msg("gap detected @ %a!\n", ofs); gapstart = ofs; } if (gapstart != BADADDR) { gapend = mem.soff; msg("char pad%d[%d]\n", gap_cnt, gapend - gapstart); //msg("gap size=%a\n", gapend - gapstart); gapend = gapstart = BADADDR; gap_cnt++; } typeinfo_t mem_ti; retrieve_member_info(&mem, &mem_ti); // data type size of member asize_t dt_mem_size = get_data_type_size(mem.flag, &mem_ti); // member size asize_t mem_size = get_member_size(&mem); // get the member's name get_member_name(mem.id, buf, sizeof(buf)); char dtype[MAXSTR]; char arraystr[MAXSTR]; char typemod[10]; arraystr[0] = 0; typemod[0] = 0; if (isWord(mem.flag)) strcpy(dtype, "unsigned short"); else if (isDwrd(mem.flag)) strcpy(dtype, "unsigned long"); else if (isByte(mem.flag)) strcpy(dtype, "char"); else if (isStruct(mem.flag)) { struc_t *nested_st = get_sptr(&mem); get_struc_name(nested_st->id, dtype, MAXSTR); } else strcpy(dtype, "user_type"); if (isOff0(mem.flag)) { strcpy(typemod, "*"); } if (isEnum0(mem.flag)) { get_enum_name(mem_ti.ec.tid, dtype, sizeof(dtype)); } asize_t ar_size = mem_size / dt_mem_size; // an array? if (ar_size > 1) { sprintf(arraystr, "[%d]", ar_size); } char out[100]; sprintf(out, "%s " /* type */ "%s" /* typemodif */ "%s" /* varname */ "%s" /*array*/ ";", dtype, typemod, buf, arraystr); msg("%s\n", out); /* inline bool idaapi isByte (flags_t F) { return isData(F) && (F & DT_TYPE) == FF_BYTE; } inline bool idaapi isWord (flags_t F) { return isData(F) && (F & DT_TYPE) == FF_WORD; } inline bool idaapi isDwrd (flags_t F) { return isData(F) && (F & DT_TYPE) == FF_DWRD; } inline bool idaapi isQwrd (flags_t F) { return isData(F) && (F & DT_TYPE) == FF_QWRD; } inline bool idaapi isOwrd (flags_t F) { return isData(F) && (F & DT_TYPE) == FF_OWRD; } inline bool idaapi isTbyt (flags_t F) { return isData(F) && (F & DT_TYPE) == FF_TBYT; } inline bool idaapi isFloat (flags_t F) { return isData(F) && (F & DT_TYPE) == FF_FLOAT; } inline bool idaapi isDouble(flags_t F) { return isData(F) && (F & DT_TYPE) == FF_DOUBLE; } inline bool idaapi isPackReal(flags_t F) { return isData(F) && (F & DT_TYPE) == FF_PACKREAL; } inline bool idaapi isASCII (flags_t F) { return isData(F) && (F & DT_TYPE) == FF_ASCI; } inline bool idaapi isStruct(flags_t F) { return isData(F) && (F & DT_TYPE) == FF_STRU; } inline bool idaapi is3byte (flags_t F) { return isData(F) && (F & DT_TYPE) == FF_3BYTE; } */ /* msg("member[%d], name=%s; %a-%a ; id=%d; flags=%x type=%s dt_mem_size=%a mem_size=%a\n", i, buf, mem.soff, mem.eoff, mem.id, mem.flag, type, dt_mem_size, mem_size); */ // we expect next member to begin @ end of last member ofs = mem.eoff; } }
void match(QNFAMatchContext *lexer, const QChar *d, int length, QNFAMatchNotifier notify) { if ( !lexer || !lexer->context ) { //qWarning("get off you scum!"); return; } // restore message buffering notify.clear(); int olvls = lexer->parents.count(), nlvls = 0, lvls = olvls; if ( lvls ) notify.startBuffering(); // quint16 c = 0; const QChar *di = d; QNFA *chain = 0, *start = 0; int index = 0, lastCxt = 0, len, idx; bool bFound, bEscape = false, bEscaped = false; bool wPrev = false, wCur = false; while ( index < length ) { bFound = false; bEscaped = false; //bEscape &= !lexer->meaningless.contains(d[index].unicode()); //while ( lexer->meaningless.contains(d[index].unicode()) && ((index + 1) < length) ) // ++index; if ( index >= length ) break; c = di->unicode(); wCur = isWord(*di); int plainIndex = -1, plainMatch, plainLength; // try fast plain matching if ( !(wPrev && wCur) ) { //qDebug("trying plain..."); //len = 0; idx = index; QCharTree::const_iterator it, match, end; it = lexer->context->tree.constFind(c); if ( it != lexer->context->tree.constEnd() ) { //qDebug("plain on %c", c); do { ++di; ++idx; end = it->next.constEnd(); match = it->next.constFind(0); if ( idx < length ) { c = di->unicode(); it = it->next.constFind(c); } else { it = end; } if ( it == end ) { if ( (match != end) && !isWord(*di) ) { //word boundary found // corresponding token end found wPrev = isWord(*(di - 1)); bFound = true; if ( match->value.action & 0x40000000 ) { // try regexps before notifying plainIndex = index; plainLength = idx - index; plainMatch = match->value.action; //qDebug("ambiguity."); } else { notify(index, idx - index, match->value.action); index = idx; } //qDebug("next step : %c", d[index].toLatin1()); //bMonitor = true; } break; } } while ( idx < length ) ; if ( bFound ) { bEscape = false; if ( plainIndex == -1 ) continue; bFound = false; } di -= idx - index; } } // fallback on regexp-like NFA-based matching QNFABranch* children = lexer->context->out.branch; if ( children ) { //qDebug("trying %i sub nfas on %c", children->count(), d[index].toLatin1()); int max = children->count(); for ( quint16 i = 0; i < max; ++i ) { len = 0; idx = index; start = chain = children->at(i); //qDebug("%ith attempt on %c", i, d[index + len].toLatin1()); while ( (idx < length) || (chain->type & Match) ) { bEscaped = false; if ( chain->type & Match ) { if ( (chain->assertion & WordEnd) && (idx < length) && isWord(*di) && isWord(*(di - 1)) ) { //qDebug("end assertion failed..."); break; } //qDebug("matched to end"); if ( chain->type & CxtBeg ) { //qDebug("entering context : 0x%x", chain); ++nlvls; bool notifySub = notify.bufferLevel(); if ( notifySub ) { // pop one message buffer notify.stopBuffering(); } // notify content of previous context until nest notify(lastCxt, index - lastCxt, lexer->context->actionid | 0x80000000); if ( notifySub ) { // notify sub matches so far to avoid tricky handling later on notify.flush(); //notify.startBuffering(); } // notify begin marker notify(index, len, start->actionid ? start->actionid : chain->actionid); // update context stack lexer->parents.push(lexer->context); lexer->context = chain; // update nest index lastCxt = idx; // push a message buffer notify.startBuffering(); } else if ( chain->type & CxtEnd ) { //qDebug("leaving context :"); if ( lexer->parents.isEmpty() ) qFatal("context nesting problem"); if ( bEscape ) { // not really end : escape found... bEscape = false; bEscaped = true; } else { if ( nlvls ) --nlvls; else --lvls; // pop one message buffer notify.stopBuffering(); // notify context content from last nest notify(lastCxt, index - lastCxt, lexer->context->actionid | 0x80000000); // flush sub matches notify.flush(); // update context stack lexer->context = lexer->parents.pop(); if ( lexer->parents.count() ) notify.startBuffering(); // update nest index lastCxt = idx; // notify end marker notify(index, len, chain->actionid); //qDebug("cxt notif..."); if ( chain->type & Exclusive ) index = idx; --index; --di; bFound = true; break; } } else if ( chain->type & CxtEsc ) { //qDebug("matched %s", qPrintable(QString(index, len))); //notify(index, len, chain->actionid); bEscape = !bEscape; } else { //qDebug("matched %s", qPrintable(QString(d + index, len))); if ( plainIndex != -1 && plainLength >= len ) { break; } notify(index, len, chain->actionid); bEscape = false; } bFound = true; index = idx; --index; --di; //qDebug("next step : %c", d[index + 1].toLatin1()); //bMonitor = true; break; } else { // "regular" nfa match (no match yet...) if ( (chain->assertion & WordStart) && (idx >= 1) && ( isWord(*(di - 1)) && isWord(*di) ) ) { //qDebug("beg assertion failed..."); break; } QChar cc = *di; bool found = match(cc, chain); if ( !(chain->assertion & ZeroOrOne) && !(chain->assertion & ZeroOrMore) && !found ) { //if ( cc.toLatin1() == ')' ) // qDebug("mismatch : %c != %c", cc.toLatin1(), chain->c.at(0)); break; } if ( found ) { //qDebug("%c", d[index + len].toLatin1()); if ( (chain->assertion & OneOrMore) || (chain->assertion & ZeroOrMore) ) { do { ++di; ++len; ++idx; } while ( (idx < length) && match(*di, chain) ); } else { ++len; ++idx; ++di; } } else { //qDebug("! %c", d[index + len].toLatin1()); } chain = chain->out.next; } } if ( bFound ) break; di -= len; } } if ( !bFound ) { if ( plainIndex != -1 ) { notify(plainIndex, plainLength, plainMatch); index = plainIndex + plainLength; di += plainLength; continue; } bEscape = false; //++index; wPrev = wCur; } else { wPrev = isWord(*di); } ++index; ++di; } // flush messages if ( !notify.bufferLevel() ) return; //qDebug("%i context nests", notify.bufferLevel()); //qDebug("[%i;+00[ : 0x%x", lastCxt, lexer->context->actionid | 0x80000000); // pop down one buffer notify.stopBuffering(); // notify overlapping context so far notify(lastCxt, length - lastCxt, lexer->context->actionid | 0x80000000); // notify sub matches notify.flush(); // make sure we leave a blank notifier... notify.clear(); // preserve escape power... if ( bEscaped ) return; // some existing left AND new one(s) if ( (olvls == lvls) && nlvls ) ++lvls; // close stay-on-line contexts, if any QStack<QNFA*>::iterator it = lexer->parents.begin() + lvls; while ( it != lexer->parents.end() ) { if ( (*it)->type & StayOnLine ) { //qDebug("staid..."); it = lexer->parents.erase(it); } else { ++it; } } if ( (lexer->context->type & StayOnLine) && nlvls && lexer->parents.count() ) lexer->context = lexer->parents.pop(); }
// ----------------------------------------------------------------------------- // Reads in a text definition of a language. See slade.pk3 for // formatting examples // ----------------------------------------------------------------------------- bool TextLanguage::readLanguageDefinition(MemChunk& mc, string_view source) { Tokenizer tz; // Open the given text data if (!tz.openMem(mc, source)) { Log::warning("Unable to open language definition {}", source); return false; } // Parse the definition text ParseTreeNode root; if (!root.parse(tz)) return false; // Get parsed data for (unsigned a = 0; a < root.nChildren(); a++) { auto node = root.childPTN(a); // Create language auto lang = new TextLanguage(node->name()); // Check for inheritance if (!node->inherit().empty()) { auto inherit = fromId(node->inherit()); if (inherit) inherit->copyTo(lang); else Log::warning("Warning: Language {} inherits from undefined language {}", node->name(), node->inherit()); } // Parse language info for (unsigned c = 0; c < node->nChildren(); c++) { auto child = node->childPTN(c); auto pn_lower = StrUtil::lower(child->name()); // Language name if (pn_lower == "name") lang->setName(child->stringValue()); // Comment begin else if (pn_lower == "comment_begin") { lang->setCommentBeginList(child->stringValues()); } // Comment end else if (pn_lower == "comment_end") { lang->setCommentEndList(child->stringValues()); } // Line comment else if (pn_lower == "comment_line") { lang->setLineCommentList(child->stringValues()); } // Preprocessor else if (pn_lower == "preprocessor") lang->setPreprocessor(child->stringValue()); // Case sensitive else if (pn_lower == "case_sensitive") lang->setCaseSensitive(child->boolValue()); // Doc comment else if (pn_lower == "comment_doc") lang->setDocComment(child->stringValue()); // Keyword lookup link else if (pn_lower == "keyword_link") lang->word_lists_[WordType::Keyword].lookup_url = child->stringValue(); // Constant lookup link else if (pn_lower == "constant_link") lang->word_lists_[WordType::Constant].lookup_url = child->stringValue(); // Function lookup link else if (pn_lower == "function_link") lang->f_lookup_url_ = child->stringValue(); // Jump blocks else if (pn_lower == "blocks") { for (unsigned v = 0; v < child->nValues(); v++) lang->jump_blocks_.push_back(child->stringValue(v)); } else if (pn_lower == "blocks_ignore") { for (unsigned v = 0; v < child->nValues(); v++) lang->jb_ignore_.push_back(child->stringValue(v)); } // Block begin else if (pn_lower == "block_begin") lang->block_begin_ = child->stringValue(); // Block end else if (pn_lower == "block_end") lang->block_end_ = child->stringValue(); // Preprocessor block begin else if (pn_lower == "pp_block_begin") { for (unsigned v = 0; v < child->nValues(); v++) lang->pp_block_begin_.push_back(child->stringValue(v)); } // Preprocessor block end else if (pn_lower == "pp_block_end") { for (unsigned v = 0; v < child->nValues(); v++) lang->pp_block_end_.push_back(child->stringValue(v)); } // Word block begin else if (pn_lower == "word_block_begin") { for (unsigned v = 0; v < child->nValues(); v++) lang->word_block_begin_.push_back(child->stringValue(v)); } // Word block end else if (pn_lower == "word_block_end") { for (unsigned v = 0; v < child->nValues(); v++) lang->word_block_end_.push_back(child->stringValue(v)); } // Keywords else if (pn_lower == "keywords") { // Go through values for (unsigned v = 0; v < child->nValues(); v++) { auto val = child->stringValue(v); // Check for '$override' if (StrUtil::equalCI(val, "$override")) { // Clear any inherited keywords lang->clearWordList(WordType::Keyword); } // Not a special symbol, add as keyword else lang->addWord(WordType::Keyword, val); } } // Constants else if (pn_lower == "constants") { // Go through values for (unsigned v = 0; v < child->nValues(); v++) { auto val = child->stringValue(v); // Check for '$override' if (StrUtil::equalCI(val, "$override")) { // Clear any inherited constants lang->clearWordList(WordType::Constant); } // Not a special symbol, add as constant else lang->addWord(WordType::Constant, val); } } // Types else if (pn_lower == "types") { // Go through values for (unsigned v = 0; v < child->nValues(); v++) { auto val = child->stringValue(v); // Check for '$override' if (StrUtil::equalCI(val, "$override")) { // Clear any inherited constants lang->clearWordList(WordType::Type); } // Not a special symbol, add as constant else lang->addWord(WordType::Type, val); } } // Properties else if (pn_lower == "properties") { // Go through values for (unsigned v = 0; v < child->nValues(); v++) { auto val = child->stringValue(v); // Check for '$override' if (StrUtil::equalCI(val, "$override")) { // Clear any inherited constants lang->clearWordList(WordType::Property); } // Not a special symbol, add as constant else lang->addWord(WordType::Property, val); } } // Functions else if (pn_lower == "functions") { bool lang_has_void = lang->isWord(Keyword, "void") || lang->isWord(Type, "void"); if (lang->id_ != "zscript") { // Go through children (functions) for (unsigned f = 0; f < child->nChildren(); f++) { auto child_func = child->childPTN(f); string params; // Simple definition if (child_func->nChildren() == 0) { if (child_func->stringValue(0).empty()) { if (lang_has_void) params = "void"; else params = ""; } else { params = child_func->stringValue(0); } // Add function lang->addFunction( child_func->name(), params, "", "", !StrUtil::contains(child_func->name(), '.'), child_func->type()); // Add args for (unsigned v = 1; v < child_func->nValues(); v++) lang->addFunction(child_func->name(), child_func->stringValue(v)); } // Full definition else { string name = child_func->name(); vector<string> args; string desc; string deprecated; for (unsigned p = 0; p < child_func->nChildren(); p++) { auto child_prop = child_func->childPTN(p); if (child_prop->name() == "args") { for (unsigned v = 0; v < child_prop->nValues(); v++) args.push_back(child_prop->stringValue(v)); } else if (child_prop->name() == "description") desc = child_prop->stringValue(); else if (child_prop->name() == "deprecated") deprecated = child_prop->stringValue(); } if (args.empty() && lang_has_void) args.emplace_back("void"); for (unsigned as = 0; as < args.size(); as++) lang->addFunction(name, args[as], desc, deprecated, as == 0, child_func->type()); } } } // ZScript function info which cannot be parsed from (g)zdoom.pk3 else { ZFuncExProp ex_prop; for (unsigned f = 0; f < child->nChildren(); f++) { auto child_func = child->childPTN(f); for (unsigned p = 0; p < child_func->nChildren(); ++p) { auto child_prop = child_func->childPTN(p); if (child_prop->name() == "description") ex_prop.description = child_prop->stringValue(); else if (child_prop->name() == "deprecated_f") ex_prop.deprecated_f = child_prop->stringValue(); } lang->zfuncs_ex_props_.emplace(child_func->name(), ex_prop); } } } } } return true; }
QString QsCodeParser::quickifiedDoc(const QString& source) { QString result; int i = 0; while (i < (int) source.length()) { if (leftWordBoundary(source, i)) { if (source[i] == 'Q') { if (source[i + 1] == 'C' && source.mid(i, 8) == "QCString") { i += 2; } else { int end = i + 1; while (isWord(source[end])) ++end; if (!classesWithNoQ.contains( source.mid(i + 1, end - (i + 1)))) result += "Q"; i++; } } else if (source[i] == 'T' && source.mid(i, 4) == "TRUE" && rightWordBoundary(source, i + 4)) { result += "\\c{true}"; i += 4; } else if (source[i] == 'F' && source.mid(i, 5) == "FALSE" && rightWordBoundary(source, i + 5)) { result += "\\c{false}"; i += 5; } else if (source[i] == 'c' && source.mid(i, 6) == "const ") { i += 6; } else { result += source[i++]; } } else if ((source[i] == ':' && source[i + 1] == ':') || (source[i] == '-' && source[i + 1] == '>')) { result += '.'; i += 2; } else if (source[i] == '\\') { // ### make independent of the command name if (source.mid(i, 5) == "\\code") { do { result += source[i++]; } while (source[i - 1] != '\n'); int begin = i; int end = source.indexOf("\\endcode", i); if (end != -1) { QString code = source.mid(begin, end - begin); result += cpp2qs.convertedCode(qsTre, code, classesWithNoQ); i = end; } } else { result += source[i++]; } } else { result += source[i++]; } } QList<QRegExp>::ConstIterator b = replaceBefores.begin(); QStringList::ConstIterator a = replaceAfters.begin(); while (a != replaceAfters.end()) { result.replace(*b, *a); ++b; ++a; } return result; }
inline bool isWord(const std::string& word) {return isWord(word.c_str());}
//////////////////////////////////////////////////////////////////////////////// // Lexer::Type::dom // [ <isUUID> | <isDigit>+ . ] <isIdentifier> [ . <isIdentifier> ]* // // Configuration: // rc.<name> // // System: // context.program // context.args // context.width // context.height // system.version // system.os // // Relative or absolute attribute: // <attribute> // <id>.<attribute> // <uuid>.<attribute> // // Single tag: // tags.<word> // // Date type: // <date>.year // <date>.month // <date>.day // <date>.week // <date>.weekday // <date>.julian // <date>.hour // <date>.minute // <date>.second // // Annotations (entry is a date): // annotations.<N>.entry // annotations.<N>.description // bool Lexer::isDOM (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; std::string partialToken; Lexer::Type partialType; if (isLiteral ("rc.", false, false) && isWord (partialToken, partialType)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } else _cursor = marker; if (isOneOf ({"context.program", "context.args", "context.width", "context.height", "system.version", "system.os"}, false, true)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } // Optional: // <uuid>. // <id>. std::string extractedToken; Lexer::Type extractedType; if (isUUID (extractedToken, extractedType, false) || isInteger (extractedToken, extractedType)) { if (! isLiteral (".", false, false)) { _cursor = marker; return false; } } // Any failure after this line should rollback to the checkpoint. std::size_t checkpoint = _cursor; // [prefix]tags.<word> if (isLiteral ("tags", false, false) && isLiteral (".", false, false) && isWord (partialToken, partialType)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } else _cursor = checkpoint; // [prefix]attribute if (isOneOf (attributes, false, true)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } // [prefix]attribute. if (isOneOf (attributes, false, false)) { if (isLiteral (".", false, false)) { std::string attribute = _text.substr (checkpoint, _cursor - checkpoint - 1); // if attribute type is 'date', then it has sub-elements. if (attributes[attribute] == "date" && isOneOf ({"year", "month", "day", "week", "weekday", "julian", "hour", "minute", "second"}, false, true)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } } else { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } } // [prefix]annotations. if (isLiteral ("annotations", true, false) && isLiteral (".", false, false)) { std::string extractedToken; Lexer::Type extractedType; if (isInteger (extractedToken, extractedType)) { if (isLiteral (".", false, false)) { if (isLiteral ("description", false, true)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } else if (isLiteral ("entry", false, true)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } else if (isLiteral ("entry", false, false) && isLiteral (".", false, false) && isOneOf ({"year", "month", "day", "week", "weekday", "julian", "hour", "minute", "second"}, false, true)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } } } } _cursor = marker; return false; }