C++ (Cpp) exalloc Beispiele

Beispiel #1

0

Datei anzeigen

Datei: api.c Projekt: mclumd/Alfred

Linkage linkage_create(int k, Sentence sent, Parse_Options opts) {
    Linkage linkage;

    assert((k < sent->num_linkages_post_processed) && (k >= 0), "index out of range");
    
    /* Using exalloc since this is external to the parser itself. */
    linkage = (Linkage) exalloc(sizeof(struct Linkage_s));
    
    linkage->num_words = sent->length;
    linkage->word = (char **) exalloc(linkage->num_words*sizeof(char *));
    linkage->current = 0;
    linkage->num_sublinkages=0;
    linkage->sublinkage = NULL;
    linkage->unionized = FALSE;
    linkage->sent = sent;
    linkage->opts = opts;
    linkage->info = sent->link_info[k];

    extract_links(sent->link_info[k].index, sent->null_count, sent->parse_info);
    compute_chosen_words(sent, linkage);
    
    if (set_has_fat_down(sent)) {
	extract_fat_linkage(sent, opts, linkage);
    }
    else {
	extract_thin_linkage(sent, opts, linkage);
    }

    if (sent->dict->postprocessor != NULL) {
       linkage_post_process(linkage, sent->dict->postprocessor);
    }

    return linkage;
}

Beispiel #2

0

Datei anzeigen

Datei: constituents.c Projekt: arv100kri/linkparser

static CNode * make_CNode(char *q) {
	CNode * cn;
	cn = exalloc(sizeof(CNode));
	cn->label = (char *) exalloc(sizeof(char)*(strlen(q)+1));
	strcpy(cn->label, q);
	cn->child = cn->next = (CNode *) NULL;
	cn->next = (CNode *) NULL;
	cn->start = cn->end = -1;
	return cn;
}

Beispiel #3

0

Datei anzeigen

Datei: print-util.c Projekt: suriyadeepan/link-grammar

String * string_new(void)
{
#define INITSZ 30
	String * string;
	string = (String *) exalloc(sizeof(String));
	string->allocated = INITSZ;
	string->p = (char *) exalloc(INITSZ*sizeof(char));
	string->p[0] = '\0';
	string->eos = 0;
	return string;
}

Beispiel #4

0

Datei anzeigen

Datei: analyze-linkage.c Projekt: arv100kri/linkparser

static Sublinkage * ex_create_sublinkage(Parse_info pi)
{
	Sublinkage *s = (Sublinkage *) exalloc (sizeof(Sublinkage));
	s->link = (Link *) exalloc(pi->N_links*sizeof(Link));
	s->num_links = pi->N_links;

	zero_sublinkage(s);

	assert(pi->N_links < MAX_LINKS, "Too many links");
	return s;
}

Beispiel #5

0

Datei anzeigen

Datei: api.c Projekt: mclumd/Alfred

PP_info excopy_pp_info(PP_info ppi) {
     static PP_info newppi;
     int i;

     newppi.num_domains = ppi.num_domains;
     newppi.domain_name = (char **) exalloc(sizeof(char *)*ppi.num_domains);
     for (i=0; i<newppi.num_domains; ++i) {
         newppi.domain_name[i] = (char *) exalloc(sizeof(char)*(strlen(ppi.domain_name[i])+1));
	 strcpy(newppi.domain_name[i], ppi.domain_name[i]);
     }
     return newppi;
}

Beispiel #6

0

Datei anzeigen

Datei: utilities.c Projekt: arkiran/LinkGrammar

Connector * excopy_connectors(Connector * c) {
    Connector *c1;

    if (c == NULL) return NULL;

    c1 = init_connector((Connector *) exalloc(sizeof(Connector)));
    *c1 = *c;
    c1->string = (char *) exalloc(sizeof(char)*(strlen(c->string)+1));
    strcpy(c1->string, c->string);
    c1->next = excopy_connectors(c->next);

    return c1;
}

Beispiel #7

0

Datei anzeigen

Datei: utilities.c Projekt: arkiran/LinkGrammar

Link excopy_link(Link l) {
     Link newl;

     if (l == NULL) return NULL;

     newl = (Link) exalloc(sizeof(struct Link_s));
     newl->name = (char *) exalloc(sizeof(char)*(strlen(l->name)+1));
     strcpy(newl->name, l->name);
     newl->l = l->l;
     newl->r = l->r;
     newl->lc = excopy_connectors(l->lc);
     newl->rc = excopy_connectors(l->rc);

     return newl;
}

Beispiel #8

0

Datei anzeigen

Datei: print-util.c Projekt: Anusaaraka/anusaaraka

void append_string(String * string, const char *fmt, ...)
{
#define TMPLEN 1024
	char temp_string[TMPLEN];
	size_t templen;
	char * p;
	size_t new_size;
	va_list args;

	va_start(args, fmt);
	templen = vsnprintf(temp_string, TMPLEN, fmt, args);
	va_end(args);

	if (string->allocated <= string->eos + templen)
	{
		new_size = 2 * string->allocated + templen + 1;
		p = (char *) exalloc(sizeof(char)*new_size);
		strcpy(p, string->p);
		strcpy(p + string->eos, temp_string);

		exfree(string->p, sizeof(char)*string->allocated);

		string->p = p;
		string->allocated = new_size;
		string->eos += templen;
	}
	else
	{
		strcpy(string->p + string->eos, temp_string);
		string->eos += templen;
	}
}

Beispiel #9

0

Datei anzeigen

Datei: post-process.c Projekt: JustAJoe/link-grammar

/**
 * Store the domain names in the linkage.
 * This is an utter waste of CPU time, if on is not interested
 * in printing the domain names.
 *
 * XXX TODO: refactor, so that this does not need to be called except
 * when printing the domain names.
 */
void linkage_set_domain_names(Postprocessor * postprocessor, Linkage linkage)
{
	PP_node * pp;
	size_t j, k;
	D_type_list * d;

	if (NULL == linkage) return;
	if (NULL == postprocessor) return;

	/* The only reason to build the type array is for this function. */
	build_type_array(postprocessor);

	linkage->pp_info = (PP_info *) exalloc(sizeof(PP_info) * linkage->num_links);

	for (j = 0; j < linkage->num_links; ++j)
	{
		linkage->pp_info[j].num_domains = 0;
		linkage->pp_info[j].domain_name = NULL;
	}

	/* Copy the post-processing results over into the linkage */
	pp = postprocessor->pp_node;
	if (pp->violation != NULL)
		return;

	for (j = 0; j < linkage->num_links; ++j)
	{
		k = 0;
		for (d = pp->d_type_array[j]; d != NULL; d = d->next) k++;
		linkage->pp_info[j].num_domains = k;
		if (k > 0)
		{
			linkage->pp_info[j].domain_name = (const char **) exalloc(sizeof(const char *)*k);
		}
		k = 0;
		for (d = pp->d_type_array[j]; d != NULL; d = d->next)
		{
			char buff[5];
			snprintf(buff, 5, "%c", d->type);
			linkage->pp_info[j].domain_name[k] =
			      string_set_add (buff, postprocessor->string_set);

			k++;
		}
	}
}

Beispiel #10

0

Datei anzeigen

Datei: analyze-linkage.c Projekt: arv100kri/linkparser

static void replace_link_name(Link l, const char *s)
{
	char * t;
	exfree((char *) l->name, sizeof(char)*(strlen(l->name)+1));
	t = (char *) exalloc(sizeof(char)*(strlen(s)+1));
	strcpy(t, s);
	l->name = t;
}

Beispiel #11

0

Datei anzeigen

Datei: post-process.c Projekt: eugeneai/link-grammar

/**
 * Store the domain names in the linkage. These are not needed
 * unless the user asks the domain names to be printed!
 */
void linkage_set_domain_names(Postprocessor *postprocessor, Linkage linkage)
{
	PP_node * pp;
	size_t j, k;
	D_type_list * d;

	if (NULL == linkage) return;
	if (NULL == postprocessor) return;

	linkage->pp_info = (PP_info *) exalloc(sizeof(PP_info) * linkage->num_links);
	memset(linkage->pp_info, 0, sizeof(PP_info) * linkage->num_links);

	/* Copy the post-processing results over into the linkage */
	pp = postprocessor->pp_node;
	if (pp->violation != NULL)
		return;

	for (j = 0; j < linkage->num_links; ++j)
	{
		k = 0;
		for (d = pp->d_type_array[j]; d != NULL; d = d->next) k++;
		linkage->pp_info[j].num_domains = k;
		if (k > 0)
		{
			linkage->pp_info[j].domain_name = (const char **) exalloc(sizeof(const char *)*k);
		}
		k = 0;
		for (d = pp->d_type_array[j]; d != NULL; d = d->next)
		{
			char buff[] = {d->type, '\0'};

			linkage->pp_info[j].domain_name[k] =
			      string_set_add (buff, postprocessor->string_set);

			k++;
		}
	}
}

Beispiel #12

0

Datei anzeigen

Datei: api.c Projekt: mclumd/Alfred

Sublinkage unionize_linkage(Linkage linkage) {
    int i, j, num_in_union=0;
    Sublinkage u;
    Link link;
    char *p;

    for (i=0; i<linkage->num_sublinkages; ++i) {
	for (j=0; j<linkage->sublinkage[i].num_links; ++j) {
	    link = linkage->sublinkage[i].link[j];
	    if (!link_already_appears(linkage, link, i)) num_in_union++;
	}
    }

    u.num_links = num_in_union;
    u.link = (Link *) exalloc(sizeof(Link)*num_in_union);
    u.pp_info = (PP_info *) exalloc(sizeof(PP_info)*num_in_union);
    u.violation = NULL;
    
    num_in_union = 0;

    for (i=0; i<linkage->num_sublinkages; ++i) {
	for (j=0; j<linkage->sublinkage[i].num_links; ++j) {
	    link = linkage->sublinkage[i].link[j];
	    if (!link_already_appears(linkage, link, i)) {
		u.link[num_in_union] = excopy_link(link);
		u.pp_info[num_in_union] = excopy_pp_info(linkage->sublinkage[i].pp_info[j]);
		if (((p=linkage->sublinkage[i].violation) != NULL) &&
		    (u.violation == NULL)) {
		    u.violation = (char *) exalloc((strlen(p)+1)*sizeof(char));
		    strcpy(u.violation, p);
		}
		num_in_union++;
	    }
	}
    }

    return u;
}

Beispiel #13

0

Datei anzeigen

Datei: api.c Projekt: TatianaBatura/link-grammar

/* Partial, but not full initialization of the linakge struct ... */
void partial_init_linkage(Linkage lkg, unsigned int N_words)
{
	lkg->num_links = 0;
	lkg->lasz = 2 * N_words;
	lkg->link_array = (Link *) malloc(lkg->lasz * sizeof(Link));
	memset(lkg->link_array, 0, lkg->lasz * sizeof(Link));

	lkg->num_words = N_words;
	lkg->cdsz =  N_words;
	lkg->chosen_disjuncts = (Disjunct **) exalloc(lkg->cdsz * sizeof(Disjunct *));
	memset(lkg->chosen_disjuncts, 0, N_words * sizeof(Disjunct *));

	lkg->disjunct_list_str = NULL;
#ifdef USE_CORPUS
	lkg->sense_list = NULL;
#endif

	lkg->pp_info = NULL;
}

Beispiel #14

0

Datei anzeigen

Datei: print-util.c Projekt: suriyadeepan/link-grammar

void append_string(String * string, const char *fmt, ...)
{
#define TMPLEN 1024
	char temp_string[TMPLEN];
	size_t templen;
	char * p;
	size_t new_size;
	va_list args;
#ifdef _MSC_VER
	char * tmp = alloca(strlen(fmt)+1);
	char * tok = tmp;

	strcpy(tmp, fmt);
	while ((tok = strstr(tok, "%zu"))) { tok[1] = 'I'; tok++;}
	fmt = tmp;
#endif

	va_start(args, fmt);
	templen = vsnprintf(temp_string, TMPLEN, fmt, args);
	va_end(args);

	if (string->allocated <= string->eos + templen)
	{
		new_size = 2 * string->allocated + templen + 1;
		p = (char *) exalloc(sizeof(char)*new_size);
		strcpy(p, string->p);
		strcpy(p + string->eos, temp_string);

		exfree(string->p, sizeof(char)*string->allocated);

		string->p = p;
		string->allocated = new_size;
		string->eos += templen;
	}
	else
	{
		strcpy(string->p + string->eos, temp_string);
		string->eos += templen;
	}
}

Beispiel #15

0

Datei anzeigen

Datei: api.c Projekt: mclumd/Alfred

int linkage_compute_union(Linkage linkage) {
    int i, num_subs=linkage->num_sublinkages;
    Sublinkage * new_sublinkage;

    if (linkage->unionized) {
	linkage->current = linkage->num_sublinkages-1;
	return 0;
    }
    if (num_subs == 1) {
	linkage->unionized = TRUE;
	return 1;
    }
    
    new_sublinkage = 
	(Sublinkage *) exalloc(sizeof(Sublinkage)*(num_subs+1));

    for (i=0; i<num_subs; ++i) {
	new_sublinkage[i] = linkage->sublinkage[i];
    }
    exfree(linkage->sublinkage, sizeof(Sublinkage)*num_subs);
    linkage->sublinkage = new_sublinkage;
    linkage->sublinkage[num_subs] = unionize_linkage(linkage);

    /* The domain data will not be needed for the union -- zero it out */
    linkage->sublinkage[num_subs].pp_data.N_domains=0;
    linkage->sublinkage[num_subs].pp_data.length=0;
    linkage->sublinkage[num_subs].pp_data.links_to_ignore=NULL;
    for (i=0; i<MAX_SENTENCE; ++i) {
      linkage->sublinkage[num_subs].pp_data.word_links[i] = NULL;
    }

    linkage->num_sublinkages++;

    linkage->unionized = TRUE;
    linkage->current = linkage->num_sublinkages-1;
    return 1;
}

Beispiel #16

0

Datei anzeigen

Datei: constituents.c Projekt: arv100kri/linkparser

/**
 * Print out the constituent tree.
 * mode 1: treebank-style constituent tree
 * mode 2: flat, bracketed tree [A like [B this B] A]
 * mode 3: flat, treebank-style tree (A like (B this) )
 */
char * linkage_print_constituent_tree(Linkage linkage, int mode)
{
	String * cs;
	CNode * root;
	char * p;

	if ((mode == 0) || (linkage->sent->dict->constituent_pp == NULL))
	{
		return NULL;
	}
	else if (mode == 1 || mode == 3)
	{
		cs = String_create();
		root = linkage_constituent_tree(linkage);
		print_tree(cs, (mode==1), root, 0, 0);
		linkage_free_constituent_tree(root);
		append_string(cs, "\n");
		p = exalloc(strlen(cs->p)+1);
		strcpy(p, cs->p);
		exfree(cs->p, sizeof(char)*cs->allocated);
		exfree(cs, sizeof(String));
		return p;
	}
	else if (mode == 2)
	{
		char * str;
		con_context_t *ctxt;

		ctxt = (con_context_t *) malloc(sizeof(con_context_t));
		str = print_flat_constituents(ctxt, linkage);
		free(ctxt);

		return str;
	}
	assert(0, "Illegal mode in linkage_print_constituent_tree");
	return NULL;
}

Beispiel #17

0

Datei anzeigen

Datei: api.c Projekt: TatianaBatura/link-grammar

static Linkage linkage_array_new(int num_to_alloc)
{
	Linkage lkgs = (Linkage) exalloc(num_to_alloc * sizeof(struct Linkage_s));
	memset(lkgs, 0, num_to_alloc * sizeof(struct Linkage_s));
	return lkgs;
}

Beispiel #18

0

Datei anzeigen

Datei: print-util.c Projekt: suriyadeepan/link-grammar

char * string_copy(String *s)
{
	char * p = (char *) exalloc(s->eos + 1);
	strcpy(p, s->p);
	return p;
}

Beispiel #19

0

Datei anzeigen

Datei: api.c Projekt: mclumd/Alfred

void linkage_post_process(Linkage linkage, Postprocessor * postprocessor) {
    int N_sublinkages = linkage_get_num_sublinkages(linkage);
    Parse_Options opts = linkage->opts;
    Sentence sent = linkage->sent;
    Sublinkage * subl;
    PP_node * pp;
    int i, j, k;
    D_type_list * d;

    for (i=0; i<N_sublinkages; ++i) {
	
	subl = &linkage->sublinkage[i];
        if (subl->pp_info != NULL) {
	    for (j=0; j<subl->num_links; ++j) {
		exfree_pp_info(subl->pp_info[j]);
	    }
	    post_process_free_data(&subl->pp_data);
	    exfree(subl->pp_info, sizeof(PP_info)*subl->num_links);
	}
	subl->pp_info = (PP_info *) exalloc(sizeof(PP_info)*subl->num_links);
	for (j=0; j<subl->num_links; ++j) {
	    subl->pp_info[j].num_domains = 0;
	    subl->pp_info[j].domain_name = NULL;
	}
	if (subl->violation != NULL) {
	    exfree(subl->violation, sizeof(char)*(strlen(subl->violation)+1));
	    subl->violation = NULL;
	}

        if (linkage->info.improper_fat_linkage) {
            pp = NULL;
        } else {
            pp = post_process(postprocessor, opts, sent, subl, FALSE);
	    /* This can return NULL, for example if there is no
	       post-processor */
        }

	if (pp == NULL) {
	    for (j=0; j<subl->num_links; ++j) {
		subl->pp_info[j].num_domains = 0;
		subl->pp_info[j].domain_name = NULL;
	    }
	}
	else {
	    for (j=0; j<subl->num_links; ++j) {
		k=0;
		for (d = pp->d_type_array[j]; d!=NULL; d=d->next) k++;
		subl->pp_info[j].num_domains = k;
		if (k > 0) {
		    subl->pp_info[j].domain_name = (char **) exalloc(sizeof(char *)*k);
		}
		k = 0;
		for (d = pp->d_type_array[j]; d!=NULL; d=d->next) {
		    subl->pp_info[j].domain_name[k] = (char *) exalloc(sizeof(char)*2);
		    sprintf(subl->pp_info[j].domain_name[k], "%c", d->type);
		    k++;
		}
	    }
	    subl->pp_data = postprocessor->pp_data;
	    if (pp->violation != NULL) {
		subl->violation = 
		    (char *) exalloc(sizeof(char)*(strlen(pp->violation)+1));
		strcpy(subl->violation, pp->violation);
	    }
	}
    }
    post_process_close_sentence(postprocessor);
}

Beispiel #20

0

Datei anzeigen

Datei: constituents.c Projekt: arv100kri/linkparser

static char * exprint_constituent_structure(con_context_t *ctxt, Linkage linkage, int numcon_total)
{
	int c, w;
	int leftdone[MAXCONSTITUENTS];
	int rightdone[MAXCONSTITUENTS];
	int best, bestright, bestleft;
	Sentence sent;
	char s[100], * p;
	String * cs = String_create();

	assert (numcon_total < MAXCONSTITUENTS, "Too many constituents");
	sent = linkage_get_sentence(linkage);

	for(c=0; c<numcon_total; c++) {
		leftdone[c]=0;
		rightdone[c]=0;
	}

	if(verbosity>=2)
		printf("\n");			

	for(w=1; w<linkage->num_words; w++) {	
		/* Skip left wall; don't skip right wall, since it may
		   have constituent boundaries */

		while(1) {
			best = -1;
			bestright = -1;
			for(c=0; c<numcon_total; c++) {
				if ((ctxt->constituent[c].left==w) &&
					(leftdone[c]==0) && (ctxt->constituent[c].valid==1) &&
					(ctxt->constituent[c].right >= bestright)) {
					best = c;
					bestright = ctxt->constituent[c].right;
				}
			}
			if (best==-1)
				break;
			leftdone[best]=1;
			if(ctxt->constituent[best].aux==1) continue;
			append_string(cs, "%c%s ", OPEN_BRACKET, ctxt->constituent[best].type);
		}

		if (w<linkage->num_words-1) {
			/* Don't print out right wall */
			strcpy(s, sent->word[w].string);

			/* Now, if the first character of the word was
			   originally uppercase, we put it back that way */
			if (sent->word[w].firstupper ==1 )
				upcase_utf8_str(s, s, MAX_WORD);
			append_string(cs, "%s ", s);
		}

		while(1) {
			best = -1;
			bestleft = -1;
			for(c=0; c<numcon_total; c++) {
				if ((ctxt->constituent[c].right==w) &&
					(rightdone[c]==0) && (ctxt->constituent[c].valid==1) &&
					(ctxt->constituent[c].left > bestleft)) {
					best = c;
					bestleft = ctxt->constituent[c].left;
				}
			}
			if (best==-1)
				break;
			rightdone[best]=1;
			if (ctxt->constituent[best].aux==1)
				continue;
			append_string(cs, "%s%c ", ctxt->constituent[best].type, CLOSE_BRACKET);
		}
	}

	append_string(cs, "\n");
	p = exalloc(strlen(cs->p)+1);
	strcpy(p, cs->p);
	exfree(cs->p, sizeof(char)*cs->allocated);
	exfree(cs, sizeof(String));
	return p;
}

Beispiel #21

0

Datei anzeigen

Datei: analyze-linkage.c Projekt: arv100kri/linkparser

/**
 * This procedure mimics analyze_fat_linkage in order to
 * extract the sublinkages and copy them to the Linkage
 * data structure passed in.
 */
void extract_fat_linkage(Sentence sent, Parse_Options opts, Linkage linkage)
{
	int i, j, N_thin_links;
	DIS_node *d_root;
	int num_sublinkages;
	Sublinkage * sublinkage;
	Parse_info pi = sent->parse_info;

	sublinkage = x_create_sublinkage(pi);
	build_digraph(pi, word_links);
	structure_violation = FALSE;
	d_root = build_DIS_CON_tree(pi, word_links);

	if (structure_violation) {
		compute_link_names(sent);
		for (i=0; i<pi->N_links; i++) {
			copy_full_link(&sublinkage->link[i],&(pi->link_array[i]));
		}

		linkage->num_sublinkages=1;
		linkage->sublinkage = ex_create_sublinkage(pi);

		/* This will have fat links! */
		for (i=0; i<pi->N_links; ++i) {
			linkage->sublinkage->link[i] = excopy_link(sublinkage->link[i]);
		}

		free_sublinkage(sublinkage);
		free_digraph(pi, word_links);
		free_DIS_tree(d_root);
		return;
	}

	/* first get number of sublinkages and allocate space */
	num_sublinkages = 0;
	for (;;) {
		num_sublinkages++;
		if (!advance_DIS(d_root)) break;
	}

	linkage->num_sublinkages = num_sublinkages;
	linkage->sublinkage =
		(Sublinkage *) exalloc(sizeof(Sublinkage)*num_sublinkages);
	for (i=0; i<num_sublinkages; ++i) {
		linkage->sublinkage[i].link = NULL;
		linkage->sublinkage[i].pp_info = NULL;
		linkage->sublinkage[i].violation = NULL;
	}

	/* now fill out the sublinkage arrays */
	compute_link_names(sent);

	num_sublinkages = 0;
	for (;;) {
		for (i=0; i<pi->N_links; i++) {
			patch_array[i].used = patch_array[i].changed = FALSE;
			patch_array[i].newl = pi->link_array[i].l;
			patch_array[i].newr = pi->link_array[i].r;
			copy_full_link(&sublinkage->link[i], &(pi->link_array[i]));
		}
		fill_patch_array_DIS(d_root, NULL, word_links);

		for (i=0; i<pi->N_links; i++) {
			if (patch_array[i].changed || patch_array[i].used) {
				sublinkage->link[i]->l = patch_array[i].newl;
				sublinkage->link[i]->r = patch_array[i].newr;
			} else if ((dfs_root_word[pi->link_array[i].l] != -1) &&
					   (dfs_root_word[pi->link_array[i].r] != -1)) {
				sublinkage->link[i]->l = -1;
			}
		}

		compute_pp_link_array_connectors(sent, sublinkage);
		compute_pp_link_names(sent, sublinkage);

		/* Don't copy the fat links into the linkage */
		N_thin_links = 0;
		for (i= 0; i<pi->N_links; ++i) {
			if (sublinkage->link[i]->l == -1) continue;
			N_thin_links++;
		}

		linkage->sublinkage[num_sublinkages].num_links = N_thin_links;
		linkage->sublinkage[num_sublinkages].link =
			(Link *) exalloc(sizeof(Link)*N_thin_links);
		linkage->sublinkage[num_sublinkages].pp_info = NULL;
		linkage->sublinkage[num_sublinkages].violation = NULL;

		for (i=0, j=0; i<pi->N_links; ++i) {
			if (sublinkage->link[i]->l == -1) continue;
			linkage->sublinkage[num_sublinkages].link[j++] =
				excopy_link(sublinkage->link[i]);
		}


		num_sublinkages++;
		if (!advance_DIS(d_root)) break;
	}

	free_sublinkage(sublinkage);
	free_digraph(pi, word_links);
	free_DIS_tree(d_root);
}

Beispiel #22

0

Datei anzeigen

Datei: linkage.c Projekt: hckiang/link-grammar

void compute_chosen_words(Sentence sent, Linkage linkage, Parse_Options opts)
{
	WordIdx i;   /* index of chosen_words */
	WordIdx j;
	Disjunct **cdjp = linkage->chosen_disjuncts;
	const char **chosen_words = alloca(linkage->num_words * sizeof(*chosen_words));
	int *remap = alloca(linkage->num_words * sizeof(*remap));
	bool *show_word = alloca(linkage->num_words * sizeof(*show_word));
	bool display_morphology = opts->display_morphology;

	Gword **lwg_path = linkage->wg_path;
	Gword **n_lwg_path = NULL; /* new Wordgraph path, to match chosen_words */

	Gword **nullblock_start = NULL; /* start of a null block, to be put in [] */
	size_t nbsize = 0;              /* number of word in a null block */
	Gword *sentence_word;

	memset(show_word, 0, linkage->num_words * sizeof(*show_word));

	if (verbosity_level(D_CCW))
		print_lwg_path(lwg_path, "Linkage");

	for (i = 0; i < linkage->num_words; i++)
	{
		Disjunct *cdj = cdjp[i];
		Gword *w;              /* current word */
		const Gword *nw;       /* next word (NULL if none) */
		Gword **wgp;           /* wordgraph_path traversing pointer */

		const char *t = NULL;  /* current word string */
		bool at_nullblock_end; /* current word is at end of a nullblock */
		bool join_alt = false; /* morpheme-join this alternative */
		char *s;
		size_t l;
		size_t m;

		lgdebug(D_CCW, "Loop start, word%zu: cdj %s, path %s\n",
		        i, cdj ? cdj->word_string : "NULL",
		        lwg_path[i] ? lwg_path[i]->subword : "NULL");

		w = lwg_path[i];
		nw = lwg_path[i+1];
		wgp = &lwg_path[i];
		sentence_word = wg_get_sentence_word(sent, w);

		/* FIXME If the original word was capitalized in a capitalizable
		 * position, the displayed null word may be its downcase version. */

		if (NULL == cdj) /* a null word (the chosen disjunct was NULL) */
		{
			chosen_words[i] = NULL;
			nbsize++;
			if (NULL == nullblock_start) /* it starts a new null block */
				nullblock_start = wgp;

			at_nullblock_end = (NULL == nw) ||
				(wg_get_sentence_word(sent, nw->unsplit_word) != sentence_word);

			/* Accumulate null words in this alternative */
			if (!at_nullblock_end && (NULL == cdjp[i+1]) &&
			    ((w->morpheme_type == MT_PUNC) == (nw->morpheme_type == MT_PUNC)))
			{
				lgdebug(D_CCW, "Skipping word%zu cdjp=NULL#%zu, path %s\n",
				        i, nbsize, w->subword);
				chosen_words[i] = NULL;
				continue;
			}

			if (NULL != nullblock_start)
			{
				/* If we are here, this null word is an end of a null block */
				lgdebug(+D_CCW, "Handling %zu null words at %zu: ", nbsize, i);

				if (1 == nbsize)
				{
					/* Case 1: A single null subword. */
					lgdebug(D_CCW, "A single null subword.\n");
					t = join_null_word(sent, wgp, nbsize);

					gwordlist_append(&n_lwg_path, w);
				}
				else
				{
					lgdebug(D_CCW, "Combining null subwords");
					/* Use alternative_id to check for start of alternative. */
					if (((*nullblock_start)->alternative_id == *nullblock_start)
					    && at_nullblock_end)
					{
						/* Case 2: A null unsplit_word (all-nulls alternative).*/
						lgdebug(D_CCW, " (null alternative)\n");
						t = sentence_word->subword;

						gwordlist_append(&n_lwg_path, sentence_word);
					}
					else
					{
						/* Case 3: Join together >=2 null morphemes. */
						Gword *wgnull;

						lgdebug(D_CCW, " (null partial word)\n");
						wgnull = wordgraph_null_join(sent, wgp-nbsize+1, wgp);
						gwordlist_append(&n_lwg_path, wgnull);
						t = wgnull->subword;
					}
				}

				nullblock_start = NULL;
				nbsize = 0;
				show_word[i] = true;

				if (MT_WALL != w->morpheme_type)
				{
					/* Put brackets around the null word. */
					l = strlen(t) + 2;
					s = (char *) alloca(l+1);
					s[0] = NULLWORD_START;
					strcpy(&s[1], t);
					s[l-1] = NULLWORD_END;
					s[l] = '\0';
					t = string_set_add(s, sent->string_set);
					lgdebug(D_CCW, " %s\n", t);
					/* Null words have no links, so take care not to drop them. */
				}
			}
		}
		else
		{
			/* This word has a linkage. */

			/* TODO: Suppress "virtual-morphemes", currently the dictcap ones. */
			char *sm;

			t = cdj->word_string;
			/* Print the subscript, as in "dog.n" as opposed to "dog". */

			if (0)
			{
				/* TODO */
			}
			else
			{
				/* Get rid of those ugly ".Ixx" */
				if (is_idiom_word(t))
				{
					s = strdupa(t);
					sm = strrchr(s, SUBSCRIPT_MARK); /* Possible double subscript. */
					UNREACHABLE(NULL == sm); /* We know it has a subscript. */
					*sm = '\0';
					t = string_set_add(s, sent->string_set);
				}
				else if (HIDE_MORPHO)
				{
					/* Concatenate the word morphemes together into one word.
					 * Concatenate their subscripts into one subscript.
					 * Use subscript separator SUBSCRIPT_SEP.
					 * XXX Check whether we can encounter an idiom word here.
					 * FIXME Combining contracted words is not handled yet, because
					 * combining morphemes which have non-LL links to other words is
					 * not yet implemented.
					 * FIXME Move to a separate function. */
					Gword **wgaltp;
					size_t join_len = 0;
					size_t mcnt = 0;

					/* If the alternative contains morpheme subwords, mark it
					 * for joining... */

					const Gword *unsplit_word = w->unsplit_word;
					for (wgaltp = wgp, j = i; NULL != *wgaltp; wgaltp++, j++)
					{

						if ((*wgaltp)->unsplit_word != unsplit_word) break;
						if (MT_INFRASTRUCTURE ==
						    (*wgaltp)->unsplit_word->morpheme_type) break;

						mcnt++;

						if (NULL == cdjp[j])
						{
							/* ... but not if it contains a null word */
							join_alt = false;
							break;
						}
						join_len += strlen(cdjp[j]->word_string) + 1;
						if ((*wgaltp)->morpheme_type & IS_REG_MORPHEME)
							join_alt = true;
					}

					if (join_alt)
					{
						/* Join it in two steps: 1. Base words. 2. Subscripts.
						 * FIXME? Can be done in one step (more efficient but maybe
						 * less clear).
						 * Put SUBSCRIPT_SEP between the subscripts.
						 * XXX No 1-1 correspondence between the hidden base words
						 * and the subscripts after the join, in case there are base
						 * words with and without subscripts. */

						const char subscript_sep_str[] = { SUBSCRIPT_SEP, '\0'};
						char *join = calloc(join_len + 1, 1); /* zeroed out */

						join[0] = '\0';

						/* 1. Join base words. (Could just use the unsplit_word.) */
						for (wgaltp = wgp, m = 0; m < mcnt; wgaltp++, m++)
						{
							add_morpheme_unmarked(sent, join, cdjp[i+m]->word_string,
							                      (*wgaltp)->morpheme_type);
						}

						strcat(join, subscript_mark_str()); /* tentative */

						/* 2. Join subscripts. */
						for (wgaltp = wgp, m = 0; m < mcnt; wgaltp++, m++)
						{
							/* Cannot NULLify the word - we may have links to it. */
							if (m != mcnt-1) chosen_words[i+m] = "";

							sm =  strchr(cdjp[i+m]->word_string, SUBSCRIPT_MARK);

							if (NULL != sm)
							{
								/* Supposing stem subscript is .=x (x optional) */
								if (MT_STEM == (*wgaltp)->morpheme_type)
								{
									sm += 1 + STEM_MARK_L; /* sm+strlen(".=") */
									if ('\0' == *sm) sm = NULL;
#if 0
									if ((cnt-1) == m)
									{
										/* Support a prefix-stem combination. In that case
										 * we have just nullified the combined word, so we
										 * need to move it to the position of the prefix.
										 * FIXME: May still not be good enough. */
										move_combined_word = i+m-1;

										/* And the later chosen_word assignment should be:
										 * chosen_words[-1 != move_combined_word ?
										 *    move_combined_word : i] = t;
										 */
									}
									else
									{
										move_combined_word = -1;
									}
#endif
								}
							}
							if (NULL != sm)
							{
								strcat(join, sm+1);
								strcat(join, subscript_sep_str);
							}
						}

						/* Remove an extra mark, if any */
						join_len = strlen(join);
						if ((SUBSCRIPT_SEP == join[join_len-1]) ||
							 (SUBSCRIPT_MARK == join[join_len-1]))
							join[join_len-1] = '\0';

						gwordlist_append(&n_lwg_path, sentence_word);
						t = string_set_add(join, sent->string_set);
						free(join);

						i += mcnt-1;
					}
				}
			}

			if (!join_alt) gwordlist_append(&n_lwg_path, *wgp);

			/*
			 * Add guess marks in [] square brackets, if needed, at the
			 * end of the base word. Convert the badly-printing
			 * SUBSCRIPT_MARK (hex 03 or ^C) into a period.
			 */
			if (t)
			{

				s = strdupa(t);
				sm = strrchr(s, SUBSCRIPT_MARK);
				if (sm) *sm = SUBSCRIPT_DOT;

				if ((!(w->status & WS_GUESS) && (w->status & WS_INDICT))
				    || !DISPLAY_GUESS_MARKS)
				{
					t = string_set_add(s, sent->string_set);
				}
				else
				{
					const char *regex_name = w->regex_name;
					/* 4 = 1(null) + 1(guess_mark) + 2 (sizeof "[]") */
					int baselen = NULL == sm ? strlen(t) : (size_t)(sm-s);
					char guess_mark = 0;

					switch (w->status & WS_GUESS)
					{
						case WS_SPELL:
							guess_mark = GM_SPELL;
							break;
						case WS_RUNON:
							guess_mark = GM_RUNON;
							break;
						case WS_REGEX:
							guess_mark = GM_REGEX;
							break;
						case 0:
							guess_mark = GM_UNKNOWN;
							break;
						default:
							assert(0, "Missing 'case: %2x'", w->status & WS_GUESS);
					}

					/* In the case of display_morphology==0, the guess indication of
					 * the last subword is used as the guess indication of the whole
					 * word.
					 * FIXME? The guess indications of other subwords are ignored in
					 * this mode. This implies that if a first or middle subword has
					 * a guess indication but the last subword doesn't have, no guess
					 * indication would be shown at all. */

					if ((NULL == regex_name) || HIDE_MORPHO) regex_name = "";
					s = alloca(strlen(t) + strlen(regex_name) + 4);
					strncpy(s, t, baselen);
					s[baselen] = '[';
					s[baselen + 1] = guess_mark;
					strcpy(s + baselen + 2, regex_name);
					strcat(s, "]");
					if (NULL != sm) strcat(s, sm);
					t = string_set_add(s, sent->string_set);
				}
			}
		}

		assert(t != NULL, "Word %zu: NULL", i);
		chosen_words[i] = t;
	}

	/* Conditional test removal of quotation marks and the "capdict" tokens,
	 * to facilitate using diff on sentence batch runs. */
	if (test_enabled("removeZZZ"))
	{
		for (i=0, j=0; i<linkage->num_links; i++)
		{
			Link *lnk = &(linkage->link_array[i]);

			if (0 == strcmp("ZZZ", lnk->link_name))
				chosen_words[lnk->rw] = NULL;
		}
	}

	/* If morphology printing is being suppressed, then all links
	 * connecting morphemes will be discarded. */
	if (HIDE_MORPHO)
	{
		/* Discard morphology links. */
		for (i=0; i<linkage->num_links; i++)
		{
			Link * lnk = &linkage->link_array[i];

			if (is_morphology_link(lnk->link_name))
			{
				/* Mark link for discarding. */
				lnk->link_name = NULL;
			}
			else
			{
				/* Mark word for not discarding. */
				show_word[lnk->rw] = true;
				show_word[lnk->lw] = true;
			}
		}
	}

	/* We alloc a little more than needed, but so what... */
	linkage->word = (const char **) exalloc(linkage->num_words*sizeof(char *));

	/* Copy over the chosen words, dropping the discarded words.
	 * However, don't discard existing words (chosen_words[i][0]).
	 * Note that if a word only has morphology links and is not combined with
	 * another word, then it will get displayed with no links at all (e.g.
	 * when explicitly specifying root and suffix for debug: root.= =suf */
	for (i=0, j=0; i<linkage->num_words; ++i)
	{
		if (chosen_words[i] &&
		    (chosen_words[i][0] || (!HIDE_MORPHO || show_word[i])))
		{
			const char *cwtmp = linkage->word[j];
			linkage->word[j] = chosen_words[i];
			chosen_words[i] = cwtmp;
			remap[i] = j;
			j++;
		}
		else
		{
			remap[i] = -1;
		}
	}
	linkage->num_words = j;

	remap_linkages(linkage, remap); /* Update linkage->link_array / num_links. */

	linkage->wg_path_display = n_lwg_path;

	if (verbosity_level(D_CCW))
		print_lwg_path(n_lwg_path, "Display");
}