Example #1
0
Darray Darray_copy(Darray dyn_ary)
    {
    Darray temp = Darray_create();
    unsigned int temp_len = Darray_len(dyn_ary);
    unsigned int i;
    
    Darray_hint(temp, (unsigned int)0, temp_len);
    for (i = 0; i < temp_len; ++i)
        Darray_addh(temp, Darray_get(dyn_ary, i));
    return temp;
    }
Example #2
0
/*--------------------------------------------------*/
Darray Darray_duplicate(Darray a)
    {
    int i, l;
    Darray ret;
    
    l = Darray_len(a);
    ret = Darray_create();
    Darray_hint(ret, 0, l);
    for (i = 0; i < l; i++) {
        Darray_addh(ret, (char*)Darray_get(a, i));
        }
    return ret;
    }
Example #3
0
void *Heap_pop_replace(Heap *heap, void *value)
{
    void *root;

    assert(heap);

    root = Darray_get(heap->contents, 0);

    Darray_set(heap->contents, 0, value);

    Heap_sift_down(heap);

    return root;
}
Example #4
0
// there is probably some genius heap merging algorithm, look it up later
// appropriates the left->cmp for use in the new heap
// and the left->type
Heap *Heap_merge(Heap *left, Heap *right)
{
    int i;
    Heap *newHeap;

    assert(left);
    assert(right);
    assert(left->elementSize == right->elementSize);

    newHeap = Heap_create(left->elementSize, left->type, left->cmp);
    if (newHeap == NULL) {
        return NULL;
    }

    for (i = 0; i < Heap_count(left); i++) {
        Heap_push(newHeap, Darray_get(left->contents, i));
    }

    for (i = 0; i < Heap_count(right); i++) {
        Heap_push(newHeap, Darray_get(right->contents, i));
    }

    return newHeap;
}
Example #5
0
void *Heap_pop(Heap *heap)
{
    void *oldRoot;
    void *last;

    assert(heap);

    oldRoot = Darray_get(heap->contents, 0);
    last = Darray_pop(heap->contents);
    Darray_set(heap->contents, 0, last);

    Heap_sift_down(heap);

    return oldRoot;
}
Example #6
0
/*--------------------------------------------------*/
Darray Darray_insert(Darray dar, int index, VOIDP data)
    {
    int l = Darray_len(dar);
    int source;
    if (index <= 0)
        Darray_addl(dar, data);
    else if (index >= l)
        Darray_addh(dar, data);
    else {
        Darray_addh(dar, NULL);
        for (source = l - 1; source >= index; source--) 
            Darray_set(dar, source + 1, Darray_get(dar, source));
        Darray_set(dar, index, data);
        }  
    return dar;
    }
Example #7
0
Heap *Heap_from_darray(Darray *array, int type, Heap_compare cmp)
{
    int i;
    Heap *heap;

    assert(array);

    heap = Heap_create(array->elementSize, type, cmp);
    if (heap == NULL) {
        return NULL;
    }

    for (i = 0; i < Darray_count(array); i++) {
        Heap_push(heap, Darray_get(array, i));
    }

    return heap;
}
Example #8
0
Darray Darray_remove(Darray dar, int index)
    {
    int s, d, l;
    l = Darray_len(dar);
    if (index == (l - 1)) 
        {
        Darray_remh(dar);
        return dar;
        }
    if (index == 0) 
        {
        Darray_reml(dar);
        return dar;
        }
    s = index + 1; d = index;
    while (s < l) 
        {
        Darray_set(dar, d, (VOIDP)Darray_get(dar, s));
        s++; d++;
        }
    Darray_remh(dar);
    return dar;
    }
int main(int             argc, char           *argv[])
{

char onewdbfr[512],onewdaft[512];
char onetagbfr[512],twotagbfr[512],onetagaft[512],twotagaft[512];
char *freshcharvar;
Darray errorlist,temperrorkey,temperrorval;
Registry errorlistcount,SEENTAGGING,WORDS;

FILE *correct_file, *guess_file, *error_list,*correct_out;
char line[5000];  /* input line buffer */
char **split_ptr,**split_ptr2;

char wdpair[1024];
char *tempstr,*tempstr2;
float CONTINUE = 10000.0;
int count2,numwrong,lengthcount;
unsigned int count;
char globalprint[500];
char systemcall[500];
char forpasting[500];
char forpasting2[500];
float globalbest = 0.0;
char flag[20];
Registry currentwd,currentwd2;
Registry always,always2;
Registry wdnexttag,wdnexttag2,wdprevtag,wdprevtag2;
Registry rbigram,lbigram,rbigram2,lbigram2;
Registry next1tag,next1tag2,prev1tag,prev1tag2;
Registry next1or2tag,next1or2tag2,prev1or2tag,prev1or2tag2;
Registry next1or2or3tag,next1or2or3tag2,prev1or2or3tag,prev1or2or3tag2;
Registry next1wd,next1wd2,prev1wd,prev1wd2;
Registry next1or2wd,next1or2wd2,prev1or2wd,prev1or2wd2;
Registry nextbigram,nextbigram2,prevbigram,prevbigram2;
Registry surroundtag,surroundtag2;
Registry next2tag,next2tag2,prev2tag,prev2tag2;
Registry next2wd,next2wd2,prev2wd,prev2wd2;
char globaldif[20];
int printscore;
FILE *allowedmovefile;
char **perl_split_ptr,**perl_split_ptr2,*atempstr,atempstr2[1024];
char space[500];

SEENTAGGING = Registry_create(Registry_strcmp,Registry_strhash);
Registry_size_hint(SEENTAGGING,GUESSNUMWORDS);
WORDS = Registry_create(Registry_strcmp,Registry_strhash);
Registry_size_hint(WORDS,GUESSNUMWORDS);

allowedmovefile = fopen(argv[4], "r");
	  while(fgets(line,sizeof(line),allowedmovefile) != NULL) {
	    if (not_just_blank(line)) {
	      line[strlen(line) - 1] = '\0';

	      perl_split_ptr = perl_split(line);
	      perl_split_ptr2 = perl_split_ptr;
	      ++perl_split_ptr;
	      atempstr= mystrdup(*perl_split_ptr2);
	      Registry_add(WORDS,atempstr,(char *)1);
	      while(*perl_split_ptr != NULL) {
		sprintf(space,"%s %s",*perl_split_ptr2,
			*perl_split_ptr);
		atempstr=mystrdup(space);

		Registry_add(SEENTAGGING,atempstr,(char *)1);
		++perl_split_ptr; }
	      free(*perl_split_ptr2);
	      free(perl_split_ptr2);
	    }

	  }


system("/bin/rm AANEWRESTRJUNKKK");
correct_tag_corpus = Darray_create();
Darray_hint(correct_tag_corpus,100,400000);
word_corpus = Darray_create();
Darray_hint(word_corpus,100,400000);


correct_file = fopen(argv[1],"r");


while(fgets(line,sizeof(line),correct_file) != NULL) {
  Darray_addh(correct_tag_corpus,staart);
  Darray_addh(correct_tag_corpus,staart);
  Darray_addh(word_corpus,staart);
  Darray_addh(word_corpus,staart);
  line[strlen(line)-1] = '\0';
  split_ptr = perl_split_independent(line);
  while (*split_ptr != NULL) {
    Darray_addh(word_corpus,*split_ptr);
    while ((*(++*split_ptr)) != '/') {
    }
    **split_ptr = '\0';
    Darray_addh(correct_tag_corpus,++*split_ptr);
    ++split_ptr;
  }
}
fclose(correct_file);


printf("READ IN CORRECT FILE\n");




while(CONTINUE > THRESHOLD) {

  guess_tag_corpus = Darray_create();
  Darray_hint(guess_tag_corpus,100,400000);
  guess_file  = fopen(argv[2],"r");
  while(fgets(line,sizeof(line),guess_file) != NULL) {
    
    Darray_addh(guess_tag_corpus,staart);
    Darray_addh(guess_tag_corpus,staart);
    line[strlen(line)-1] = '\0';
    split_ptr = perl_split_independent(line); 
    split_ptr2 = split_ptr;
    while (*split_ptr != NULL) {
      tempstr = strtok(*split_ptr,"/");
      tempstr = strtok(NULL,"/");
      tempstr2  = mystrdup(tempstr);
      Darray_addh(guess_tag_corpus,tempstr2);
      free(*split_ptr);
      ++split_ptr;
    }
    free(split_ptr2);
  }
  fclose(guess_file);

printf("READ IN BAD FILE\n");

  errorlist = Darray_create();
  Darray_hint(errorlist,10,500);
  temperrorkey = Darray_create();
  temperrorval = Darray_create();
  Darray_hint(temperrorkey,10,500);
  Darray_hint(temperrorval,10,500);

  init_hash(&errorlistcount,500);


  printscore=0;
  for(count=0;count<Darray_len(guess_tag_corpus);++count) {
    if
      (! is_tagged_with((char *)Darray_get(correct_tag_corpus,count),(char *)Darray_get(guess_tag_corpus,count))) { 
	++printscore; 
	freshcharvar =
	  mystrdup(first_tag_nospace((char *)Darray_get(guess_tag_corpus,count)));
	sprintf(forpasting,"%s %s",freshcharvar,
		                       (char *)Darray_get(correct_tag_corpus,count));
	increment_array_create(&errorlistcount,forpasting);
      } 
  }

  error_list = fopen("AANEWRESTRJUNKKK","a");
  Registry_fetch_contents(errorlistcount,temperrorkey,temperrorval);  
  for (count=0;count<Darray_len(temperrorkey);++count) {
    if (*(int *)(char *)Darray_get(temperrorval,count) > THRESHOLD)
      /*Darray_addh(errorlist,tempstr);*/
      fprintf(error_list,"%d %s\n",*(int *)(char *)Darray_get(temperrorval,count),
	                    (char *)Darray_get(temperrorkey,count));
    free((char *)Darray_get(temperrorval,count));
    free((char *)Darray_get(temperrorkey,count));
  }
  fclose(error_list);
  Darray_destroy(temperrorval);
  Darray_destroy(temperrorkey);
  Registry_destroy(errorlistcount);

  printf("NUM ERRORS: %d\n",printscore);
/* shoud sort error list !!!!!!!*/
  
  system("cat AANEWRESTRJUNKKK | sort -rn > AANEWRESTRJUNKKK2");
  system("mv AANEWRESTRJUNKKK2 AANEWRESTRJUNKKK");
  
  error_list = fopen("AANEWRESTRJUNKKK","r");
  while(fgets(line,sizeof(line),error_list) != NULL) {
    line[strlen(line)-1] = '\0';
    tempstr = mystrdup(line);
    Darray_addh(errorlist,tempstr); 
  }
  fclose(error_list);
  system("/bin/rm AANEWRESTRJUNKKK");

  globalbest= 0;
  strcpy(globalprint,"");
  
  
  for (count=0;count<Darray_len(errorlist);++count) {

    localbest =0;
    strcpy(localbestthing,"");
    /*printf("ERROR LIST GUY: %s\n",(char *)Darray_get(errorlist,count));      */

    split_ptr = perl_split_independent((char *)Darray_get(errorlist,count));
/*printf("ERRORLISTGUY: %s %s %s\n",split_ptr[0],split_ptr[1],split_ptr[2]);*/
    wrong = split_ptr[1];
    right = split_ptr[2];
    numwrong = atoi(split_ptr[0]);
    if (numwrong > THRESHOLD3) {
      
      printf("WRONG,RI: %s %s\n",wrong,right);
      printf("GLOBALBEST, GLOBALPRINT, GLOBALDIF: %f %s %s\n",globalbest,globalprint,globaldif);

      init_hash(&always,NUMTAGS/2);
      init_hash(&always2,NUMTAGS/2);
      init_hash(&rbigram,(NUMWDS*NUMWDS)/4);
      init_hash(&lbigram,(NUMWDS*NUMWDS)/4);
      init_hash(&rbigram2,(NUMWDS*NUMWDS)/4);
      init_hash(&lbigram2,(NUMWDS*NUMWDS)/4);
      init_hash(&wdnexttag,(NUMWDS*NUMTAGS)/4);
      init_hash(&wdnexttag2,(NUMWDS*NUMTAGS)/4);
      init_hash(&wdprevtag,(NUMWDS*NUMTAGS)/4);
      init_hash(&wdprevtag2,(NUMWDS*NUMTAGS)/4);
      init_hash(&next1tag,NUMTAGS/2);
      init_hash(&next1tag2,NUMTAGS/2);
      init_hash(&prev1tag,NUMTAGS/2);
      init_hash(&prev1tag2,NUMTAGS/2);
      init_hash(&next1or2tag,NUMTAGS/2);
      init_hash(&next1or2tag2,NUMTAGS/2);
      init_hash(&prev1or2tag,NUMTAGS/2);
      init_hash(&prev1or2tag2,NUMTAGS/2);
      init_hash(&next1wd,NUMWDS/2);
      init_hash(&next1wd2,NUMWDS/2);
      init_hash(&prev1wd,NUMWDS/2);
      init_hash(&prev1wd2,NUMWDS/2);
      init_hash(&currentwd,NUMWDS/2);
      init_hash(&currentwd2,NUMWDS/2);
      init_hash(&next1or2wd,NUMWDS/2);
      init_hash(&next1or2wd2,NUMWDS/2);
      init_hash(&prev1or2wd,NUMWDS/2);
      init_hash(&prev1or2wd2,NUMWDS/2);
      init_hash(&next1or2or3tag,NUMTAGS/2);
      init_hash(&next1or2or3tag2,NUMTAGS/2);
      init_hash(&prev1or2or3tag,NUMTAGS/2);
      init_hash(&prev1or2or3tag2,NUMTAGS/2);
      init_hash(&nextbigram,NUMTAGS);
      init_hash(&nextbigram2,NUMTAGS);
      init_hash(&prevbigram,NUMTAGS);
      init_hash(&prevbigram2,NUMTAGS);
      init_hash(&surroundtag,NUMTAGS);
      init_hash(&surroundtag2,NUMTAGS);
      init_hash(&next2tag,NUMTAGS/2);
      init_hash(&next2tag2,NUMTAGS/2);
      init_hash(&prev2tag,NUMTAGS/2);
      init_hash(&prev2tag2,NUMTAGS/2);
      init_hash(&next2wd,NUMWDS/2);
      init_hash(&next2wd2,NUMWDS/2);
      init_hash(&prev2wd,NUMWDS/2);
      init_hash(&prev2wd2,NUMWDS/2);


      lengthcount = Darray_len(correct_tag_corpus);
      for(count2=0;count2<lengthcount;++count2){
	sprintf(atempstr2,"%s %s",(char *)Darray_get(word_corpus,count2),right);
	if (Registry_get(WORDS,(char *)Darray_get(word_corpus,count2)) &&
	    ! Registry_get(SEENTAGGING,atempstr2)) 
	  strcpy(flag,"NOMATCH");
	else if 
	  (strcmp((char *)Darray_get(correct_tag_corpus,count2),right) == 0 &&
	   (strcmp
	    (first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2)),wrong) 
	    ==  0)
	    &&
	    (! is_tagged_with(right,(char *)Darray_get(guess_tag_corpus,count2))))
	  strcpy(flag,"BADMATCH");
	else if
	  (strcmp((char *)Darray_get(correct_tag_corpus,count2),right) != 0 &&
	   (strcmp
	    (first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2)),wrong) 
	    ==  0)
	   &&
	   (! is_tagged_with(right,(char *)Darray_get(guess_tag_corpus,count2))))
	  strcpy(flag,"GOODMATCH");
	else 
	  strcpy(flag,"NOMATCH");
	
	if (strcmp(flag,"BADMATCH") == 0) {
	  increment_array(&always,"DUMMY");
	  increment_array(&currentwd,(char *)Darray_get(word_corpus,count2));
	  if (count2 != lengthcount-1) {
	    strcpy(onewdaft,(char *)Darray_get(word_corpus,count2+1));
	    strcpy(onetagaft,
		   first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+1)));
	    sprintf(wdpair,"%s %s",(char *)Darray_get(word_corpus,count2),
		                  (char *)Darray_get(word_corpus,count2+1));
    	    increment_array_create(&rbigram,wdpair);
	    sprintf(wdpair,"%s %s",(char *)Darray_get(word_corpus,count2),
		    first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+1)));
	    increment_array_create(&wdnexttag,wdpair);
	    increment_array_create(&next1or2tag,
			    first_tag_nospace(
				(char *)Darray_get(guess_tag_corpus,count2+1)));
	    increment_array_create(&next1or2or3tag,
			    first_tag_nospace(
				(char *)Darray_get(guess_tag_corpus,count2+1)));
	    increment_array_create(&next1tag,
			    first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+1)));
	    increment_array(&next1wd,(char *)Darray_get(word_corpus,count2+1));
	    increment_array(&next1or2wd,(char *)Darray_get(word_corpus,count2+1));
	  }
	  if (count2 < lengthcount-2) {
	    strcpy(twotagaft,first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+2)));
	    strcpy(forpasting2,
		   first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+1)));
	    sprintf(forpasting,"%s %s",forpasting2,
		                       first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+2)));
	    increment_array_create(&nextbigram,forpasting);
	    increment_array_create(&next2tag,first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+2)));
	    increment_array(&next2wd,(char *)Darray_get(word_corpus,count2+2));
	    if
	      (strcmp(first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+2)),                 onetagaft) != 0)
	    {
	      increment_array_create(&next1or2tag,
		  first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+2)));
	      
	      increment_array_create(&next1or2or3tag,
		  first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+2)));
	    }
	    if (strcmp((char *)Darray_get(word_corpus,count2+2),onewdaft) != 0)
	      increment_array(&next1or2wd,(char *)Darray_get(word_corpus,count2+2));
	  }
	  if (count2 < lengthcount-3) {
	    if (strcmp(first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+3)),onetagaft) != 0
		&&
		strcmp(first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+3)),twotagaft) != 0)
	      increment_array_create(&next1or2or3tag,
		 first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+3)));
	  }
	  if (count2 != 0) {
	    strcpy(onewdbfr,(char *)Darray_get(word_corpus,count2-1));
	    strcpy(onetagbfr,first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-1)));
	    sprintf(wdpair,"%s %s",(char *)Darray_get(word_corpus,count2-1),
		                  (char *)Darray_get(word_corpus,count2));
    	    increment_array_create(&lbigram,wdpair);
	    sprintf(wdpair,"%s %s",first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-1)),
		                  (char *)Darray_get(word_corpus,count2));
    	    increment_array_create(&wdprevtag,wdpair);
	    increment_array_create(&prev1tag,first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-1)));
	    increment_array(&prev1wd,(char *)Darray_get(word_corpus,count2-1));
	    increment_array(&prev1or2wd,(char *)Darray_get(word_corpus,count2-1));
	    increment_array_create(&prev1or2tag,
			    first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-1)));
	    increment_array_create(&prev1or2or3tag,
			    first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-1)));
	    if (count2 < lengthcount-1) {
	      strcpy(forpasting2,
		     first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-1)));
	      sprintf(forpasting,"%s %s",forpasting2,
		                       first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+1)));
	      increment_array_create(&surroundtag,forpasting);
	    }
	  }
	  if (count2 > 1) {
	    strcpy(twotagbfr,first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-2)));
	    strcpy(forpasting2,
		   first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-2)));
	    sprintf(forpasting,"%s %s",forpasting2,
		                       first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-1)));
	    increment_array_create(&prevbigram,forpasting);
	    increment_array_create(&prev2tag,first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-2)));
	    increment_array(&prev2wd,(char *)Darray_get(word_corpus,count2-2));
	    if (strcmp(first_tag_nospace(
		 (char *)Darray_get(guess_tag_corpus,count2-2)),onetagbfr) != 0){ 
	      increment_array_create(&prev1or2tag,
				     first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-2)));
	      increment_array_create(&prev1or2or3tag,
				     first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-2)));
	    }
	    if (strcmp((char *)Darray_get(word_corpus,count2-2),onewdbfr) !=
		0)
	      increment_array(&prev1or2wd,(char *)Darray_get(word_corpus,count2-2));
	  }
	  if (count2 > 2) {
	     if (strcmp(first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-3)),onetagbfr) != 0
		&&
		strcmp(first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-3)),twotagbfr) != 0)
	       increment_array_create(&prev1or2or3tag,
			    first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-3)));
	  }
	}


	else if (strcmp(flag,"GOODMATCH") == 0) {
	  increment_array(&always2,"DUMMY");
	  increment_array(&currentwd2,(char *)Darray_get(word_corpus,count2));
	  if (count2 != lengthcount-1) {
	    strcpy(onewdaft,(char *)Darray_get(word_corpus,count2+1));
	    strcpy(onetagaft,first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+1)));
            sprintf(wdpair,"%s %s",(char *)Darray_get(word_corpus,count2),
		                  (char *)Darray_get(word_corpus,count2+1));
    	    increment_array_create(&rbigram2,wdpair);
	    sprintf(wdpair,"%s %s",(char *)Darray_get(word_corpus,count2),
		                  first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+1)));
    	    increment_array_create(&wdnexttag2,wdpair);
	    increment_array_create(&next1tag2,first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+1)));
	    increment_array(&next1wd2,(char *)Darray_get(word_corpus,count2+1));
	    increment_array(&next1or2wd2,(char *)Darray_get(word_corpus,count2+1));
	    increment_array_create(&next1or2tag2,
			    first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+1)));
	    increment_array_create(&next1or2or3tag2,
			    first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+1)));
	  }
	  if (count2 < lengthcount-2) {
	    strcpy(twotagaft,first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+2)));
	    strcpy(forpasting2,
		   first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+1)));
	    sprintf(forpasting,"%s %s",forpasting2,
		                       first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+2)));
	    increment_array_create(&nextbigram2,forpasting);
	    increment_array_create(&next2tag2,first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+2)));
	    increment_array(&next2wd2,(char *)Darray_get(word_corpus,count2+2));
	    if (strcmp(first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+2)),onetagaft) !=0) {
	      increment_array_create(&next1or2tag2,
		     first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+2)));
	      increment_array_create(&next1or2or3tag2,
		    first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+2)));
	    }
	    if (strcmp((char *)Darray_get(word_corpus,count2+2),onewdaft) !=0)
	      increment_array(&next1or2wd2,(char *)Darray_get(word_corpus,count2+2));
	  }
	  if (count2 < lengthcount-3) {
	    if (strcmp(first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+3)),onetagaft) !=0 
		&&
		strcmp(first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+3)),twotagaft) !=0 )
	      increment_array_create(&next1or2or3tag2,
			    first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+3)));
	  }
	  if (count2 != 0) {
	    strcpy(onewdbfr,(char *)Darray_get(word_corpus,count2-1));
	    strcpy(onetagbfr,(char *)Darray_get(guess_tag_corpus,count2-1));
	    sprintf(wdpair,"%s %s",(char *)Darray_get(word_corpus,count2-1),
		                  (char *)Darray_get(word_corpus,count2));
    	    increment_array_create(&lbigram2,wdpair);
	    sprintf(wdpair,"%s %s",first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-1)),
		                  (char *)Darray_get(word_corpus,count2));
    	    increment_array_create(&wdprevtag2,wdpair);
	    increment_array_create(&prev1tag2,first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-1)));
	    increment_array(&prev1wd2,(char *)Darray_get(word_corpus,count2-1));
	    increment_array(&prev1or2wd2,(char *)Darray_get(word_corpus,count2-1));
	    increment_array_create(&prev1or2tag2,
			    first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-1)));
	    increment_array_create(&prev1or2or3tag2,
			    first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-1)));
	    if (count2 < lengthcount-1) {
	      strcpy(forpasting2,
		     first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-1)));
	      sprintf(forpasting,"%s %s",forpasting2,
		                       first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2+1)));
      increment_array_create(&surroundtag2,forpasting);
	    }
	  }
	  if (count2 >1 ) { 
	    strcpy(twotagbfr,first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-2)));
	    strcpy(forpasting2,
		   first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-2)));
	    sprintf(forpasting,"%s %s",forpasting2,
		                       first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-1)));
	    increment_array_create(&prevbigram2,forpasting);
	    increment_array_create(&prev2tag2,first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-2)));
	    increment_array(&prev2wd2,(char *)Darray_get(word_corpus,count2-2));
	    if (strcmp(first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-2)),onetagbfr) != 0){
	      increment_array_create(&prev1or2tag2,
			    first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-2)));
	      increment_array_create(&prev1or2or3tag2,
			    first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-2)));
	    }
	    if (strcmp((char *)Darray_get(word_corpus,count2-2),onewdbfr) != 0)
	      increment_array(&prev1or2wd2,(char *)Darray_get(word_corpus,count2-2));
	  }
	  if (count2 > 2) {
	    if (strcmp(first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-3)),onetagbfr) != 0 
		&&
		strcmp(first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-3)),twotagbfr) != 0)
	      increment_array_create(&prev1or2or3tag2,
			    first_tag_nospace((char *)Darray_get(guess_tag_corpus,count2-3)));
	  }
	}
      }  


check_counts(&always,&always2,"ALWAYS");
check_counts(&prev1tag,&prev1tag2,"PREVTAG");
check_counts(&next1tag,&next1tag2,"NEXTTAG");
check_counts(&next1or2tag,&next1or2tag2,"NEXT1OR2TAG");
check_counts(&prev1or2tag,&prev1or2tag2,"PREV1OR2TAG");
check_counts(&next1wd,&next1wd2,"NEXTWD");
check_counts(&currentwd,&currentwd2,"CURRENTWD");
check_counts(&prev1wd,&prev1wd2,"PREVWD");
check_counts(&rbigram,&rbigram2,"RBIGRAM");
check_counts(&lbigram,&lbigram2,"LBIGRAM");
check_counts(&wdnexttag,&wdnexttag2,"WDNEXTTAG");
check_counts(&wdprevtag,&wdprevtag2,"WDPREVTAG");
check_counts(&next1or2wd,&next1or2wd2,"NEXT1OR2WD");
check_counts(&prev1or2wd,&prev1or2wd2,"PREV1OR2WD");
check_counts(&next1or2or3tag,&next1or2or3tag2,"NEXT1OR2OR3TAG");
check_counts(&prev1or2or3tag,&prev1or2or3tag2,"PREV1OR2OR3TAG");
check_counts(&prevbigram,&prevbigram2,"PREVBIGRAM");
check_counts(&nextbigram,&nextbigram2,"NEXTBIGRAM");
check_counts(&surroundtag,&surroundtag2,"SURROUNDTAG");
check_counts(&next2tag,&next2tag2,"NEXT2TAG");
check_counts(&prev2tag,&prev2tag2,"PREV2TAG");
check_counts(&next2wd,&next2wd2,"NEXT2WD");
check_counts(&prev2wd,&prev2wd2,"PREV2WD");



    if (localbest > globalbest) {
      globalbest = localbest;
      strcpy(globaldif,localdif);
      strcpy(globalprint,localbestthing);}
    }
  }
  free(split_ptr[0]);
  free(split_ptr[1]);
  free(split_ptr[2]);
  free(split_ptr);
  for (count=0;count<strlen(globalprint);++count)
    if (*(globalprint+count) == '\'') 
      *(globalprint+count) = '\b'; 
  sprintf(systemcall,"cat %s | fix-kbest-rule-learn \'%s\' %s > aanewmynewtagggs",
	  argv[2],globalprint,argv[4]);
  system(systemcall);
  for (count=0;count<strlen(globalprint);++count)
    if (*(globalprint+count) == '\b') 
      *(globalprint+count) = '\''; 
  sprintf(systemcall,"mv aanewmynewtagggs %s",argv[2]);
  system(systemcall);
  correct_out = fopen(argv[3],"a");
  fprintf(correct_out,"%s\n",globalprint);
/*  fprintf(correct_out,"%d %s %s\n",globalbest,globalprint,globaldif);*/
  fclose(correct_out);
  CONTINUE = globalbest; 
  for (count=0;count<Darray_len(guess_tag_corpus);++count)
    if (strcmp((tempstr=(char *)Darray_get(guess_tag_corpus,count)),"STAART") != 0)
      free(tempstr);
  Darray_destroy(guess_tag_corpus);
  for (count=0;count<Darray_len(errorlist);++count)
    free((char *)Darray_get(errorlist,count));
  Darray_destroy(errorlist);
	 
}
return 0;
}
void check_counts(Registry *goodhash,Registry *badhash,const char *label)
{
  Darray tempkey,tempval,temp2key,temp2val;
  int *hashtempstr,hashtempval;
  unsigned int tempcount;
  float hashtempval2;
  float  tempbest;
  int FREEFLAG;
  
  FREEFLAG = 0;

  if (strcmp(label,"PREVBIGRAM") == 0 ||
      strcmp(label,"NEXTBIGRAM") == 0 ||
      strcmp(label,"WDPREVTAG") == 0 ||
      strcmp(label,"WDNEXTTAG") == 0 ||
      strcmp(label,"RBIGRAM") == 0 ||
      strcmp(label,"LBIGRAM") == 0 ||
      strcmp(label,"NEXTTAG") == 0 ||
      strcmp(label,"NEXT2TAG") == 0 ||
      strcmp(label,"NEXT1OR2TAG") == 0 ||
      strcmp(label,"NEXT1OR2OR3TAG") == 0 ||
      strcmp(label,"PREVTAG") == 0 ||
      strcmp(label,"PREV2TAG") == 0 ||
      strcmp(label,"PREV1OR2TAG") == 0 ||
      strcmp(label,"PREV1OR2OR3TAG") == 0 ||
      strcmp(label,"SURROUNDTAG") == 0) 
    FREEFLAG = 1;

  tempkey = Darray_create();
  Darray_hint(tempkey,10,Registry_entry_count(*goodhash));
  tempval = Darray_create();
  Darray_hint(tempval,10,Registry_entry_count(*goodhash));
  temp2key = Darray_create();
  Darray_hint(temp2key,10,Registry_entry_count(*badhash));
  temp2val = Darray_create();
  Darray_hint(temp2val,10,Registry_entry_count(*badhash));

  Registry_fetch_contents(*goodhash,tempkey,tempval);
  Registry_fetch_contents(*badhash,temp2key,temp2val);
  for (tempcount=0;tempcount<Darray_len(tempkey);++tempcount) {

      hashtempstr  = (int *)Registry_get(*badhash,(char *)Darray_get(tempkey,tempcount));
      if (hashtempstr == NULL) 
	hashtempval  =  0;
      else
	hashtempval = *hashtempstr;
      hashtempval2 = (float)hashtempval;
      hashtempval2 +=1.0;
       if (*(int *)(char *)Darray_get(tempval,tempcount) > THRESHOLD2) {
      if ((tempbest = (float)*(int *)(char *)Darray_get(tempval,tempcount)/hashtempval2)
	  > localbest &&
	  tempbest < 1.0) {
	localbest = tempbest;
	sprintf(localdif,"%d %f",*(int *)(char *)Darray_get(tempval,tempcount),hashtempval2);
	sprintf(localbestthing,"%s %s %s %s",wrong,right,label,(char *)Darray_get(tempkey,tempcount)); 
      }}
      free((char *)Darray_get(tempval,tempcount));
      if (FREEFLAG)
	      free((char *)Darray_get(tempkey,tempcount));
	     
    }
    for (tempcount=0;tempcount<Darray_len(temp2key);++tempcount) {
       free((char *)Darray_get(temp2val,tempcount)); 
       if (FREEFLAG)
	        free((char *)Darray_get(temp2key,tempcount)); 
    }
    Darray_destroy(tempval);
    Darray_destroy(temp2val);
    Darray_destroy(tempkey);
    Darray_destroy(temp2key);
    Registry_destroy(*goodhash);
    Registry_destroy(*badhash);
}