Пример #1
0
void insert(char *sorted_word, int index, trie_node *root)
{
	if(*sorted_word=='\0')
	{
		if(root->word>0)
		{
			index_node *temp = root->head;
			while(temp->next!=NULL)
				temp = temp->next;
			temp->next = new_index_node(index);
		} else
		{
			root->head = new_index_node(index);
		}
	} else
	{
		if(root->arr[*sorted_word-'a']==NULL)
		{
			root->arr[*sorted_word-'a'] = new_trie_node();
			insert(sorted_word+1, index, root->arr[*sorted_word-'a']);
		} else
		{
			insert(sorted_word+1, index, root->arr[*sorted_word-'a']);
		}
	}
}
Пример #2
0
trie_node *generate_automaton(char *ps[]){
  trie_node *root=new_trie_node();
  for (int i=0;ps[i];i++){
    trie_node *tmp=root;
    char *p=ps[i];
    for (int j=0;p[j];j++){
      if (!tmp->succ[p[j]]){
        trie_node *new_node=new_trie_node();
        new_node->ch=p[j];
        if (!p[j+1])
          new_node->pattern=p;
        tmp->succ[p[j]]=new_node;
        tmp=new_node;
      }
      else{
        tmp=tmp->succ[p[j]];
        if (!p[j+1])
          tmp->pattern=p;
      }
    }
  }
  compute_fail(root);
  return root;
}
Пример #3
0
void fill_TRIE( struct hash * token_types ) {
    int i;
    char * path;
    char * accepting_type;
    TRIE = new_trie_node();
    struct list * key_list = new_list();
    list_keys_in_hash( token_types, key_list, "" );
    for ( i = 0; i < key_list->next_index; i++ ) {
        path = listlookup( key_list, i );
        accepting_type = hashlookup( token_types, path )->data;
        add_to_trie( TRIE, path, accepting_type );
    }
    destroy_key_list( key_list );
    return;
}
Пример #4
0
void add_to_trie( struct trie_node * trie, char * path, char * accepting_type ) {
    struct trie_node * current_node = trie;
    struct trie_node * next_node = NULL;
    char c;
    int i;
    int path_len = strlen( path );
    for ( i = 0; i < path_len; i++ ) {
        c = path[i];
        struct  charhash * looked_up = charhashlookup( current_node->child, c );
        if ( ! looked_up ) {
            next_node = new_trie_node();
            add_to_charhash( current_node->child, c, (void *) next_node );
        }
        else {
            next_node = looked_up->data;
        }
        if ( i == path_len - 1 ) { // Last char gets accepting type.
            next_node->accepting_type = accepting_type;
        }
        current_node = next_node;
    }
    return;
}
Пример #5
0
/* Second pass: read the execution records.  Maintain a trie that is
 * indexed by the opcode on the top level and where each sub-level is
 * a linked list of instructions along with execution counts and their
 * subtrees.  Also maintain an array of current positions in the trie:
 * for each new instruction we get to, a new entry is added to this
 * array so that we track the sequence starting at that instruction
 * too.
 */
void read_samples(const char* filename)
{
    static unsigned bigbuf[1000000];
    FILE *sample_fp;
    unsigned total_samples, signature, prev_pc;
    codestruct_t *prev_codestruct;
    trie_t* tracker[1000];
    unsigned operand_value[1000][10];  /* Up to 10 distinct operand values per tracker.  We use offset 0..9 + 1; 0 means "no information" */
    unsigned next_symbol[1000];        /* Next symbol (= operand slot, if below 10) */
    int nexttracker;
    int trie_nodes = 0;
    int bigbuflim = 0;
    int bigbufptr = 0;
    int despecializations = 0;

    if (verbose)
	fprintf(stderr, "%s\n", filename);
    (sample_fp = fopen(filename, "rb")) != NULL || fail("No such file: %s", filename);

    fread(&signature, sizeof(unsigned), 1, sample_fp) == 1 || fail("No sample signature");
    signature == 0xDA1ADA1A || fail("Bad sample signature: %08x", signature);

    total_samples = 0;
    prev_codestruct = NULL;
    prev_pc = 0;
    nexttracker = 0;
    for (;;) {
	unsigned pc, opcode, ops, op, opcodes[10];
	codestruct_t *codestruct;

	if (bigbufptr == bigbuflim) {
	    int res;
	    res = fread(bigbuf, sizeof(unsigned), sizeof(bigbuf)/sizeof(bigbuf[0]), sample_fp);
	    if (res <= 0)
		break;
	    bigbufptr = 0;
	    bigbuflim = res;
	}
	pc = bigbuf[bigbufptr++];
	total_samples++;
	codestruct = (codestruct_t*)bsearch(&pc, codestructs, nextcodestruct, sizeof(codestruct_t), pc_cmp);
	codestruct != NULL || fail("Could not find pc=%08x anywhere in code memory.", pc);
	opcode = codestruct->code[(pc - codestruct->start)/wordsize];
	if (opcode > 255)
	    opcode = 256 + (opcode >> 8);

	/* Despecialize.  We can do it in the interpreter or here;
	 * doing it here is simpler for the moment, provided we don't
	 * have to adjust branch targets and so on.  Despecialization
	 * merely inserts opcodes into a buffer, across which we then
	 * iterate; in the degenerate case, the single opcode is not
	 * despecialized and is just inserted into the buffer alone.
	 *
	 * Right now there's no need to handle argument values.
	 *
	 * Open questions (likely answers are always "no"):
	 *  - Should we despecialize IFFALSE as NOT; IFTRUE?
	 *  - Should increment and decrement be despecialized as PUSHINT 1; ADD?
	 *  - Should inclocal and declocal be despecialized by breaking
	 *    it into GETLOCAL; INCREMENT; SETLOCAL?
	 */
	ops = 0;
	switch (opcode) {
	default: {
	    int n = operands[opcode];
	    opcodes[ops++] = opcode;
	    if (n > 0) { n--; opcodes[ops++] = codestruct->code[(pc - codestruct->start)/wordsize + 1]; }
	    if (n > 0) { n--; opcodes[ops++] = codestruct->code[(pc - codestruct->start)/wordsize + 2]; }
	    assert(n == 0);
	    break;
	}
	case OP_getlocal0:
	case OP_getlocal1:
	case OP_getlocal2:
	case OP_getlocal3:
	    opcodes[ops++] = OP_getlocal;
	    opcodes[ops++] = OP_getlocal - opcode;
	    break;
	case OP_setlocal0:
	case OP_setlocal1:
	case OP_setlocal2:
	case OP_setlocal3:
	    opcodes[ops++] = OP_setlocal;
	    opcodes[ops++] = OP_setlocal - opcode;
	    break;
	case OP_iflt:
	    opcodes[ops++] = OP_lessthan;
	    opcodes[ops++] = OP_iftrue;
	    opcodes[ops++] = 0;
	    break;
	case OP_ifle:
	    opcodes[ops++] = OP_lessequals;
	    opcodes[ops++] = OP_iftrue;
	    opcodes[ops++] = 0;
	    break;
	case OP_ifnlt:
	    opcodes[ops++] = OP_lessthan;
	    opcodes[ops++] = OP_iffalse;
	    opcodes[ops++] = 0;
	    break;
	case OP_ifnle:
	    opcodes[ops++] = OP_lessequals;
	    opcodes[ops++] = OP_iffalse;
	    opcodes[ops++] = 0;
	    break;
	case OP_ifgt:
	    opcodes[ops++] = OP_greaterthan;
	    opcodes[ops++] = OP_iftrue;
	    opcodes[ops++] = 0;
	    break;
	case OP_ifge:
	    opcodes[ops++] = OP_greaterequals;
	    opcodes[ops++] = OP_iftrue;
	    opcodes[ops++] = 0;
	    break;
	case OP_ifngt:
	    opcodes[ops++] = OP_greaterthan;
	    opcodes[ops++] = OP_iffalse;
	    opcodes[ops++] = 0;
	    break;
	case OP_ifnge:
	    opcodes[ops++] = OP_greaterequals;
	    opcodes[ops++] = OP_iffalse;
	    opcodes[ops++] = 0;
	    break;
	case OP_ifeq:
	    opcodes[ops++] = OP_equals;
	    opcodes[ops++] = OP_iftrue;
	    opcodes[ops++] = 0;
	    break;
	case OP_ifstricteq:
	    opcodes[ops++] = OP_strictequals;
	    opcodes[ops++] = OP_iftrue;
	    opcodes[ops++] = 0;
	    break;
	case OP_ifne:
	    opcodes[ops++] = OP_equals;
	    opcodes[ops++] = OP_iffalse;
	    opcodes[ops++] = 0;
	    break;
	case OP_ifstrictne:
	    opcodes[ops++] = OP_strictequals;
	    opcodes[ops++] = OP_iffalse;
	    opcodes[ops++] = 0;
	    break;
	}

	if (opcodes[0] != opcode || ops > 1)
	    despecializations++;

	if (prev_codestruct != codestruct)
	    nexttracker = 0;

	op = 0;
	while ( op < ops ) {
	    int i;
	    opcode = opcodes[op];
	    assert( opcode < INSTRCOUNT );
	    for ( i=0 ; i < nexttracker ; i++ ) {
		trie_t *t = tracker[i];
		unsigned opd1=0, opd2=0;
		if (operand_tracking)
		    compute_operands(opcodes, op, &opd1, &opd2, i, operand_value, next_symbol);
		if (t->left_child == NULL) {
		    t->left_child = new_trie_node(opcode, opd1, opd2);
		    ++trie_nodes;
		    tracker[i] = t->left_child;
		}
		else {
		    trie_t *t2 = t->left_child;
		    trie_t *t3 = NULL;
		    if (operand_tracking) {
			while (t2 != NULL && (t2->opcode != opcode || t2->opd1 != opd1 || t2->opd2 != opd2))
			    t3 = t2, t2 = t2->right_sibling;
		    }
		    else {
			while (t2 != NULL && t2->opcode != opcode)
			    t3 = t2, t2 = t2->right_sibling;
		    }
		    if (t2 != NULL) {
			/* Move the node to the head of the list in order to keep the
			   hottest nodes first */
			t2->count++;
			if (t3 != NULL) {  /* Otherwise it's already at the head of the list */
			    t3->right_sibling = t2->right_sibling;
			    t2->right_sibling = t->left_child;
			    t->left_child = t2;
			}
		    }
		    else {
			t2 = new_trie_node(opcode, opd1, opd2);
			++trie_nodes;
			t2->right_sibling = t->left_child;
			t->left_child = t2;
		    }
		    tracker[i] = t2;
		}
	    }
	    nexttracker < sizeof(tracker)/sizeof(tracker[0]) || fail("Out of tracker memory, probably a bug");
	    tracker[nexttracker++] = &toplevel[opcode];
	    toplevel[opcode].count++;
	    if (operand_tracking) {
		unsigned opd1, opd2;
		next_symbol[nexttracker-1] = 0;
		compute_operands(opcodes, op, &opd1, &opd2, nexttracker-1, operand_value, next_symbol);
		/* Wrong.  This breaks down for two-operand instructions at the top level,
		 * as there may be two variants: op[1,1] and op[1,2].  In almost all cases
		 * it will be latter, and it is very unlikely that this bug will cause 
		 * actual problems.  So ignore it, rather than reengineering it.
		 */
		toplevel[opcode].opd1 = opd1;
		toplevel[opcode].opd2 = opd2;
	    }

	    op += operands[opcode] + 1;

	    if (jumps[opcode]) {
		assert(op == ops);
		nexttracker = 0;
	    }
	}
	prev_codestruct = codestruct;
	prev_pc = pc;
    }

    fclose(sample_fp);

    if (verbose) {
	fprintf(stderr, "Samples processed: %d\n", total_samples);
	fprintf(stderr, "Despecializations: %d\n", despecializations);
	fprintf(stderr, "Trie nodes: %d\n", trie_nodes);
    }
}