void insert(char *sorted_word, int index, trie_node *root) { if(*sorted_word=='\0') { if(root->word>0) { index_node *temp = root->head; while(temp->next!=NULL) temp = temp->next; temp->next = new_index_node(index); } else { root->head = new_index_node(index); } } else { if(root->arr[*sorted_word-'a']==NULL) { root->arr[*sorted_word-'a'] = new_trie_node(); insert(sorted_word+1, index, root->arr[*sorted_word-'a']); } else { insert(sorted_word+1, index, root->arr[*sorted_word-'a']); } } }
trie_node *generate_automaton(char *ps[]){ trie_node *root=new_trie_node(); for (int i=0;ps[i];i++){ trie_node *tmp=root; char *p=ps[i]; for (int j=0;p[j];j++){ if (!tmp->succ[p[j]]){ trie_node *new_node=new_trie_node(); new_node->ch=p[j]; if (!p[j+1]) new_node->pattern=p; tmp->succ[p[j]]=new_node; tmp=new_node; } else{ tmp=tmp->succ[p[j]]; if (!p[j+1]) tmp->pattern=p; } } } compute_fail(root); return root; }
void fill_TRIE( struct hash * token_types ) { int i; char * path; char * accepting_type; TRIE = new_trie_node(); struct list * key_list = new_list(); list_keys_in_hash( token_types, key_list, "" ); for ( i = 0; i < key_list->next_index; i++ ) { path = listlookup( key_list, i ); accepting_type = hashlookup( token_types, path )->data; add_to_trie( TRIE, path, accepting_type ); } destroy_key_list( key_list ); return; }
void add_to_trie( struct trie_node * trie, char * path, char * accepting_type ) { struct trie_node * current_node = trie; struct trie_node * next_node = NULL; char c; int i; int path_len = strlen( path ); for ( i = 0; i < path_len; i++ ) { c = path[i]; struct charhash * looked_up = charhashlookup( current_node->child, c ); if ( ! looked_up ) { next_node = new_trie_node(); add_to_charhash( current_node->child, c, (void *) next_node ); } else { next_node = looked_up->data; } if ( i == path_len - 1 ) { // Last char gets accepting type. next_node->accepting_type = accepting_type; } current_node = next_node; } return; }
/* Second pass: read the execution records. Maintain a trie that is * indexed by the opcode on the top level and where each sub-level is * a linked list of instructions along with execution counts and their * subtrees. Also maintain an array of current positions in the trie: * for each new instruction we get to, a new entry is added to this * array so that we track the sequence starting at that instruction * too. */ void read_samples(const char* filename) { static unsigned bigbuf[1000000]; FILE *sample_fp; unsigned total_samples, signature, prev_pc; codestruct_t *prev_codestruct; trie_t* tracker[1000]; unsigned operand_value[1000][10]; /* Up to 10 distinct operand values per tracker. We use offset 0..9 + 1; 0 means "no information" */ unsigned next_symbol[1000]; /* Next symbol (= operand slot, if below 10) */ int nexttracker; int trie_nodes = 0; int bigbuflim = 0; int bigbufptr = 0; int despecializations = 0; if (verbose) fprintf(stderr, "%s\n", filename); (sample_fp = fopen(filename, "rb")) != NULL || fail("No such file: %s", filename); fread(&signature, sizeof(unsigned), 1, sample_fp) == 1 || fail("No sample signature"); signature == 0xDA1ADA1A || fail("Bad sample signature: %08x", signature); total_samples = 0; prev_codestruct = NULL; prev_pc = 0; nexttracker = 0; for (;;) { unsigned pc, opcode, ops, op, opcodes[10]; codestruct_t *codestruct; if (bigbufptr == bigbuflim) { int res; res = fread(bigbuf, sizeof(unsigned), sizeof(bigbuf)/sizeof(bigbuf[0]), sample_fp); if (res <= 0) break; bigbufptr = 0; bigbuflim = res; } pc = bigbuf[bigbufptr++]; total_samples++; codestruct = (codestruct_t*)bsearch(&pc, codestructs, nextcodestruct, sizeof(codestruct_t), pc_cmp); codestruct != NULL || fail("Could not find pc=%08x anywhere in code memory.", pc); opcode = codestruct->code[(pc - codestruct->start)/wordsize]; if (opcode > 255) opcode = 256 + (opcode >> 8); /* Despecialize. We can do it in the interpreter or here; * doing it here is simpler for the moment, provided we don't * have to adjust branch targets and so on. Despecialization * merely inserts opcodes into a buffer, across which we then * iterate; in the degenerate case, the single opcode is not * despecialized and is just inserted into the buffer alone. * * Right now there's no need to handle argument values. * * Open questions (likely answers are always "no"): * - Should we despecialize IFFALSE as NOT; IFTRUE? * - Should increment and decrement be despecialized as PUSHINT 1; ADD? * - Should inclocal and declocal be despecialized by breaking * it into GETLOCAL; INCREMENT; SETLOCAL? */ ops = 0; switch (opcode) { default: { int n = operands[opcode]; opcodes[ops++] = opcode; if (n > 0) { n--; opcodes[ops++] = codestruct->code[(pc - codestruct->start)/wordsize + 1]; } if (n > 0) { n--; opcodes[ops++] = codestruct->code[(pc - codestruct->start)/wordsize + 2]; } assert(n == 0); break; } case OP_getlocal0: case OP_getlocal1: case OP_getlocal2: case OP_getlocal3: opcodes[ops++] = OP_getlocal; opcodes[ops++] = OP_getlocal - opcode; break; case OP_setlocal0: case OP_setlocal1: case OP_setlocal2: case OP_setlocal3: opcodes[ops++] = OP_setlocal; opcodes[ops++] = OP_setlocal - opcode; break; case OP_iflt: opcodes[ops++] = OP_lessthan; opcodes[ops++] = OP_iftrue; opcodes[ops++] = 0; break; case OP_ifle: opcodes[ops++] = OP_lessequals; opcodes[ops++] = OP_iftrue; opcodes[ops++] = 0; break; case OP_ifnlt: opcodes[ops++] = OP_lessthan; opcodes[ops++] = OP_iffalse; opcodes[ops++] = 0; break; case OP_ifnle: opcodes[ops++] = OP_lessequals; opcodes[ops++] = OP_iffalse; opcodes[ops++] = 0; break; case OP_ifgt: opcodes[ops++] = OP_greaterthan; opcodes[ops++] = OP_iftrue; opcodes[ops++] = 0; break; case OP_ifge: opcodes[ops++] = OP_greaterequals; opcodes[ops++] = OP_iftrue; opcodes[ops++] = 0; break; case OP_ifngt: opcodes[ops++] = OP_greaterthan; opcodes[ops++] = OP_iffalse; opcodes[ops++] = 0; break; case OP_ifnge: opcodes[ops++] = OP_greaterequals; opcodes[ops++] = OP_iffalse; opcodes[ops++] = 0; break; case OP_ifeq: opcodes[ops++] = OP_equals; opcodes[ops++] = OP_iftrue; opcodes[ops++] = 0; break; case OP_ifstricteq: opcodes[ops++] = OP_strictequals; opcodes[ops++] = OP_iftrue; opcodes[ops++] = 0; break; case OP_ifne: opcodes[ops++] = OP_equals; opcodes[ops++] = OP_iffalse; opcodes[ops++] = 0; break; case OP_ifstrictne: opcodes[ops++] = OP_strictequals; opcodes[ops++] = OP_iffalse; opcodes[ops++] = 0; break; } if (opcodes[0] != opcode || ops > 1) despecializations++; if (prev_codestruct != codestruct) nexttracker = 0; op = 0; while ( op < ops ) { int i; opcode = opcodes[op]; assert( opcode < INSTRCOUNT ); for ( i=0 ; i < nexttracker ; i++ ) { trie_t *t = tracker[i]; unsigned opd1=0, opd2=0; if (operand_tracking) compute_operands(opcodes, op, &opd1, &opd2, i, operand_value, next_symbol); if (t->left_child == NULL) { t->left_child = new_trie_node(opcode, opd1, opd2); ++trie_nodes; tracker[i] = t->left_child; } else { trie_t *t2 = t->left_child; trie_t *t3 = NULL; if (operand_tracking) { while (t2 != NULL && (t2->opcode != opcode || t2->opd1 != opd1 || t2->opd2 != opd2)) t3 = t2, t2 = t2->right_sibling; } else { while (t2 != NULL && t2->opcode != opcode) t3 = t2, t2 = t2->right_sibling; } if (t2 != NULL) { /* Move the node to the head of the list in order to keep the hottest nodes first */ t2->count++; if (t3 != NULL) { /* Otherwise it's already at the head of the list */ t3->right_sibling = t2->right_sibling; t2->right_sibling = t->left_child; t->left_child = t2; } } else { t2 = new_trie_node(opcode, opd1, opd2); ++trie_nodes; t2->right_sibling = t->left_child; t->left_child = t2; } tracker[i] = t2; } } nexttracker < sizeof(tracker)/sizeof(tracker[0]) || fail("Out of tracker memory, probably a bug"); tracker[nexttracker++] = &toplevel[opcode]; toplevel[opcode].count++; if (operand_tracking) { unsigned opd1, opd2; next_symbol[nexttracker-1] = 0; compute_operands(opcodes, op, &opd1, &opd2, nexttracker-1, operand_value, next_symbol); /* Wrong. This breaks down for two-operand instructions at the top level, * as there may be two variants: op[1,1] and op[1,2]. In almost all cases * it will be latter, and it is very unlikely that this bug will cause * actual problems. So ignore it, rather than reengineering it. */ toplevel[opcode].opd1 = opd1; toplevel[opcode].opd2 = opd2; } op += operands[opcode] + 1; if (jumps[opcode]) { assert(op == ops); nexttracker = 0; } } prev_codestruct = codestruct; prev_pc = pc; } fclose(sample_fp); if (verbose) { fprintf(stderr, "Samples processed: %d\n", total_samples); fprintf(stderr, "Despecializations: %d\n", despecializations); fprintf(stderr, "Trie nodes: %d\n", trie_nodes); } }