static void extend_tokens(t_alias_processor *p, char **accumulator) { t_lst *tokenized_tokens; t_tokenizer *tokenizer; if (twl_lst_len(p->tokens)) { *accumulator = twl_strjoinfree(*accumulator, " ", 'l'); *accumulator = twl_strjoinfree(*accumulator, token_mgr_first(p->tokens)->text, 'l'); twl_lst_pop_front(p->tokens); } tokenizer = tokenizer_new(*accumulator); tokenizer->cur_line = p->line; tokenized_tokens = tokenizer_tokenize(tokenizer); token_mgr_mark_is_alias_expan(tokenized_tokens, p->processed); token_mgr_mark_is_alias_expan(tokenized_tokens, p->prev_processed); twl_lst_extend_front(p->tokens, tokenized_tokens); if (p->ast) { twl_lst_extend(p->ast->tokens_ref_tracker, tokenized_tokens); } tokenizer_del(tokenizer); twl_lst_del(tokenized_tokens, NULL); }
Phrase * phrase_from_string (const gchar *str) { Tokenizer *tok; gchar *term; Phrase *phrase; g_return_val_if_fail(str != NULL, NULL); if (index(str, '/') != NULL) { gchar **strs; guint idx; strs = g_strsplit(str, "/", 0); phrase = phrase_new(); for (idx = 0; strs[idx] != NULL; idx++) { phrase_append(phrase, strs[idx]); } g_strfreev(strs); } else { tok = tokenizer_new(str); phrase = phrase_new(); while(term = tokenizer_next(tok)){ phrase_append_nocopy(phrase, term); } tokenizer_free(tok); } return phrase; }
static STRINGLIST *make_excludes(const char *input) { if (!input) { return NULL; } TOKENIZER *t = tokenizer_new(); tokenizer_set_delimiters(t, ":"); tokenizer_enable_escaped_delimiters(t, true); STRINGLIST *excludes = stringlist_copy(tokenizer_create_tokens(t, input)); tokenizer_free_tokens(t); tokenizer_delete(t); return excludes; }
t_ast *ast_new_from_string(char *input, int ast_flags, int line) { t_ast *ast; t_lst *tokens; t_tokenizer *tokenizer; print_if_verbose(input, ast_flags); tokenizer = tokenizer_new(input); tokenizer->cur_line = line; tokens = tokenizer_tokenize(tokenizer); ast = ast_new(tokens, ast_flags); twl_lst_iter(tokenizer->tok_open_stack, push_to_ast_open_stack, ast->ast_open_stack); tokenizer_del(tokenizer); token_mgr_del(tokens); return (ast); }
/* reverse polish https://en.wikipedia.org/wiki/Shunting-yard_algorithm */ queue *syard_run(const char *in) { /* init */ stack *s, *arity; queue *q; int len; tokenizer_ctx *tkc; char *tok, *op, *newstr; tokenizer_type tok_last = TOKEN_LBRACKET; char comma = ',', mul = '*'; int *arn; s = stack_new(); arity = stack_new(); q = queue_new(); tkc = tokenizer_new(); newstr = strdup(in); newstr[strcspn(newstr, "\r\n")] = 0; /* strip newlines */ tokenizer_reset(tkc, newstr); /* while there are tokens to be read, read a token. */ while ((tok = tokenizer_next(tkc)) != NULL) { switch (tkc->type) { /* if the token is a number, then push it to the output queue. */ case TOKEN_NUMBER: /* special case: last token was a rbracket and we have number now */ if (tok_last == TOKEN_RBRACKET) { /* push a * sign with high precendence */ stack_push(s, (void *)&mul); } queue_enqueue(q, syard_create_double(tok)); break; /* if the token is an operator, then: */ case TOKEN_OPERATOR: /* special case: last token was left bracket or operator and we have a minus sign now */ if ((tok_last == TOKEN_LBRACKET || tok_last == TOKEN_OPERATOR) && (*tok == '-')) { /* change the operator to the special 'm' operator that we'll deal with in rpn_calc */ *tok = 'm'; } /* while there is an operator at the top of the operator stack with greater than or equal to precedence: */ while (((op = stack_top(s)) != NULL) && operator_is_preceding(*op, *tok)) { /* pop operators from the operator stack, onto the output queue; */ queue_enqueue(q, create_char_data(*(char *)stack_pop(s))); } /* push the read operator onto the operator stack. */ stack_push(s, (void *)tok); break; /* if the token is a left bracket (i.e. "("), then: */ case TOKEN_LBRACKET: /* special case: last token was a number or rbracket and we have lbracket now */ if (tok_last == TOKEN_NUMBER || tok_last == TOKEN_RBRACKET) { /* push a * sign with high precendence */ stack_push(s, (void *)&mul); } /* push it onto the operator stack */ stack_push(s, tok); break; /* if the token is a right bracket (i.e. ")"), then: */ case TOKEN_RBRACKET: /* while the operator at the top of the operator stack is not a left bracket: */ while (((op = stack_top(s)) != NULL) && *op != '(') { /* pop operators from the operator stack onto the output queue. */ queue_enqueue(q, create_char_data(*(char *)stack_pop(s))); } /* if the stack runs out without finding a left bracket, then there are mismatched parentheses. */ if (op == NULL || (op != NULL && *op != '(')) { /* mismatched parentheses */ printf("! mismatched parentheses; extra )\n"); goto err_cleanup; } /* pop the left bracket from the stack. */ stack_pop(s); /* check if stack top is a function and if so, pop it */ if (stack_top(s) != NULL && *((char *)stack_top(s)) == '\0') { /* this was a function */ char *ps; void *p; /* pop item from stack */ ps = (char *)stack_pop(s); /* remove leading null */ memmove(ps, ps+1, strlen(ps+1)+1); /* fetch function arity */ arn = stack_pop(arity); /* create function data */ p = create_function_data(*arn, ps); free(ps); free(arn); /* enqueue */ queue_enqueue(q, p); } break; case TOKEN_FUNCTION: /* special case: last token was a number or rbracket and we have variable now */ if (tok_last == TOKEN_NUMBER || tok_last == TOKEN_RBRACKET) { /* push a * sign with high precendence */ stack_push(s, (void *)&mul); } len = strlen(tok); op = calloc(len + 2, sizeof(char)); op[0] = '\0'; op[len] = '\0'; memcpy(op+1, tok, len); stack_push(s, op); arn = malloc(sizeof(int)); *arn = 1; stack_push(arity, arn); break; case TOKEN_COMMA: while (((op = stack_top(s)) != NULL) && *op != ',' && *op != '(') { /* pop operators from the operator stack onto the output queue. */ queue_enqueue(q, create_char_data(*(char *)stack_pop(s))); } if (*op == ',') stack_pop(s); stack_push(s, &comma); arn = stack_pop(arity); (*arn)++; stack_push(arity, arn); break; case TOKEN_VARIABLE: /* special case: last token was a number or rbracket and we have variable now */ if (tok_last == TOKEN_NUMBER || tok_last == TOKEN_RBRACKET) { /* push a * sign with high precendence */ stack_push(s, (void *)&mul); } queue_enqueue(q, create_var_data(tok)); break; default: break; } tok_last = tkc->type; } /* if there are no more tokens to read: */ if (tkc->type == TOKEN_END) { /* while there are still operator tokens on the stack: */ while (((op = stack_top(s)) != NULL) && *op != '(') { /* pop the operator onto the output queue. */ queue_enqueue(q, create_char_data(*(char *)stack_pop(s))); } /* if the operator token on the top of the stack is a bracket, then there are mismatched parentheses. */ if (op != NULL && *op == '(') { printf("! mismatched parentheses; extra (\n"); goto err_cleanup; } } else { printf("! unknown character `%c` in equation\n", *(tkc->pos)); goto err_cleanup; } stack_destroy(s); stack_destroy(arity); tokenizer_destroy(tkc); free(newstr); return q; err_cleanup: stack_foreach(s, syard_string_cleanup, NULL); stack_destroy(s); stack_foreach(arity, syard_queue_cleanup, NULL); stack_destroy(arity); tokenizer_destroy(tkc); queue_foreach(q, syard_queue_cleanup, NULL); queue_destroy(q); free(newstr); return NULL; }
int main (int argc, char **argv) { toptions *opt; tsequence *seq; treadseq *rs = NULL; ttokenizer *tokenizer = NULL; char *command; opt = (toptions *) calloc(1,sizeof(toptions)); init_defaults(opt); process_args(opt, 0, argc, argv); if (!opt->terminate) { if (optind < argc) rs = readseq_open(READSEQ_STRING, argv[optind]); else if (opt->inputfile) rs = readseq_open(READSEQ_FILE, opt->inputfile); else if (!isatty(fileno(stdin))) rs = readseq_open(READSEQ_STDIN, NULL); else { printf("Interactive mode. Try `./RNAfold -h` for more information.\n", argv[0]); rl_init(); opt->interactive = 1; opt->colored_output = 1 - opt->colored_output; tokenizer = tokenizer_new(); rs = readseq_open(READSEQ_STRING, ""); } while (1) { if (opt->interactive) { if (opt->colored_output) printf("%s\nInput sequence (upper or lower case); :q to quit, -h for help.\n....,....1....,....2....,....3....,....4....,....5....,....6....,....7....,....8\n%s",COLOR_RED,COLOR_DEFAULT); else printf("\nInput sequence (upper or lower case); :q to quit, -h for help.\n....,....1....,....2....,....3....,....4....,....5....,....6....,....7....,....8\n"); command = rl_gets(); if (!command || (command[0] == '@') || ((command[0] == ':') && (command[1] == 'q'))) { pcolor(opt->colored_output,COLOR_BLUE); printf("Leaving RNAfold."); pcolor(opt->colored_output,COLOR_DEFAULT); printf("\n"); exit(0); } else if (command[0] == ':') { pcolor(opt->colored_output,COLOR_BLUE); if (command[1] == 's') print_settings(opt); if (command[1] == 'd') { init_defaults(opt); opt->colored_output = 1; opt->interactive = 1; printf("Activated default configuration.\n"); pcolor(opt->colored_output,COLOR_DEFAULT); } if (command[1] == 'e') { system(command + 2); } if (command[1] == 'r') { system("make update"); system("./RNAfold"); exit(0); } } else if (command[0] == '-') { tokenizer_exec(tokenizer, argv[0], command); process_args(opt, 1, tokenizer->count, tokenizer->token); if (opt->inputfile) { rs = readseq_free(rs); rs = readseq_open(READSEQ_FILE, opt->inputfile); } free(opt->inputfile); opt->inputfile = NULL; } else { rs = readseq_free(rs); rs = readseq_open(READSEQ_STRING, command); } } while (1) { seq = readseq_next_fasta(rs); if (!(seq->success)) break; if (1) { main_rnafold_mfe(opt, seq); } sequence_free(seq); } if (!opt->interactive) break; } } exit(0); }