static int32
nextline_str2words(FILE * fp, int32 * lineno,
                   char **lineptr, char ***wordptr)
{
    for (;;) {
        size_t len;
        int32 n;

        ckd_free(*lineptr);
        if ((*lineptr = fread_line(fp, &len)) == NULL)
            return -1;

        (*lineno)++;

        if ((*lineptr)[0] == FSG_MODEL_COMMENT_CHAR)
            continue;           /* Skip comment lines */

        n = str2words(*lineptr, NULL, 0);
        if (n == 0)
            continue;           /* Skip blank lines */

        /* Abuse of realloc(), but this doesn't have to be fast. */
        if (*wordptr == NULL)
            *wordptr = ckd_calloc(n, sizeof(**wordptr));
        else
            *wordptr = ckd_realloc(*wordptr, n * sizeof(**wordptr));
        return str2words(*lineptr, *wordptr, n);
    }
}
Beispiel #2
0
int
ps_add_word(ps_decoder_t *ps,
            char const *word,
            char const *phones,
            int update)
{
    int32 wid, lmwid;
    ngram_model_t *lmset;
    s3cipid_t *pron;
    char **phonestr, *tmp;
    int np, i, rv;

    /* Parse phones into an array of phone IDs. */
    tmp = ckd_salloc(phones);
    np = str2words(tmp, NULL, 0);
    phonestr = ckd_calloc(np, sizeof(*phonestr));
    str2words(tmp, phonestr, np);
    pron = ckd_calloc(np, sizeof(*pron));
    for (i = 0; i < np; ++i) {
        pron[i] = bin_mdef_ciphone_id(ps->acmod->mdef, phonestr[i]);
        if (pron[i] == -1) {
            E_ERROR("Unknown phone %s in phone string %s\n",
                    phonestr[i], tmp);
            ckd_free(phonestr);
            ckd_free(tmp);
            ckd_free(pron);
            return -1;
        }
    }
    /* No longer needed. */
    ckd_free(phonestr);
    ckd_free(tmp);

    /* Add it to the dictionary. */
    if ((wid = dict_add_word(ps->dict, word, pron, np)) == -1) {
        ckd_free(pron);
        return -1;
    }
    /* No longer needed. */
    ckd_free(pron);

    /* Now we also have to add it to dict2pid. */
    dict2pid_add_word(ps->d2p, wid);

    if ((lmset = ps_get_lmset(ps)) != NULL) {
        /* Add it to the LM set (meaning, the current LM).  In a perfect
         * world, this would result in the same WID, but because of the
         * weird way that word IDs are handled, it doesn't. */
        if ((lmwid = ngram_model_add_word(lmset, word, 1.0))
            == NGRAM_INVALID_WID)
            return -1;
    }
 
    /* Rebuild the widmap and search tree if requested. */
    if (update) {
        if ((rv = ps_search_reinit(ps->search, ps->dict, ps->d2p) < 0))
            return rv;
    }
    return wid;
}
Beispiel #3
0
alignment_t *
parse_alignment(char *line, dict2pid_t *d2p)
{
    alignment_t *al;
    char **wptr;
    int nf, i;
    double spos;
    int32 frate = 100; /* FIXME */

    nf = str2words(line, NULL, 0);
    if (nf < 0)
        return NULL;
    wptr = ckd_calloc(nf, sizeof(*wptr));
    nf = str2words(line, wptr, nf);
    if (nf < 0) {
        ckd_free(wptr);
        return NULL;
    }
    al = alignment_init(d2p);
    spos = 0.0;
    for (i = 0; i < nf; ++i) {
        char *c = strchr(wptr[i], ':');
        double epos;
        int duration;
        if (c == NULL) /* word ID */
            break;
        *c++ = '\0';
        epos = atof(c);
        duration = (int) ((epos - spos) * frate);
        alignment_add_word(al, dict_wordid(d2p->dict, wptr[i]), duration);
        spos = epos;
    }
    return al;
}
static void
evaluate_string(ngram_model_t *lm, logmath_t *lmath, const char *text)
{
	char *textfoo;
	char **words;
	int32 n, ch, noovs, nccs, lscr;

	/* Split it into an array of strings. */
	textfoo = ckd_salloc(text);
	n = str2words(textfoo, NULL, 0);
	if (n < 0)
		E_FATAL("str2words(textfoo, NULL, 0) = %d, should not happen\n", n);
	if (n == 0) /* Do nothing! */
		return;
	words = ckd_calloc(n, sizeof(*words));
	str2words(textfoo, words, n);

	ch = calc_entropy(lm, words, n, &nccs, &noovs, &lscr);

	printf("input: %s\n", text);
	printf("cross-entropy: %f bits\n",
	       ch * log(logmath_get_base(lmath)) / log(2));

	/* Calculate perplexity pplx = exp CH */
	printf("perplexity: %f\n", logmath_exp(lmath, ch));
        printf("lm score: %d\n", lscr);

	/* Report OOVs and CCs */
	printf("%d words evaluated\n", n);
	printf("%d OOVs, %d context cues removed\n",
	      noovs, nccs);

	ckd_free(textfoo);
	ckd_free(words);
}
static void
evaluate_file(ngram_model_t *lm, logmath_t *lmath, const char *lsnfn)
{
	FILE *fh;
        lineiter_t *litor;
	int32 nccs, noovs, nwords, lscr;
	float64 ch, log_to_log2;;

	if ((fh = fopen(lsnfn, "r")) == NULL)
		E_FATAL_SYSTEM("failed to open transcript file %s", lsnfn);

	/* We have to keep ch in floating-point to avoid overflows, so
	 * we might as well use log2. */
	log_to_log2 = log(logmath_get_base(lmath)) / log(2);
	lscr = nccs = noovs = nwords = 0;
	ch = 0.0;
        for (litor = lineiter_start(fh); litor; litor = lineiter_next(litor)) {
		char **words;
		int32 n, tmp_ch, tmp_noovs, tmp_nccs, tmp_lscr;

		n = str2words(litor->buf, NULL, 0);
		if (n < 0)
			E_FATAL("str2words(line, NULL, 0) = %d, should not happen\n", n);
		if (n == 0) /* Do nothing! */
			continue;
		words = ckd_calloc(n, sizeof(*words));
		str2words(litor->buf, words, n);

		/* Remove any utterance ID (FIXME: has to be a single "word") */
		if (words[n-1][0] == '('
		    && words[n-1][strlen(words[n-1])-1] == ')')
			n = n - 1;

		tmp_ch = calc_entropy(lm, words, n, &tmp_nccs,
                                      &tmp_noovs, &tmp_lscr);

		ch += (float64) tmp_ch * (n - tmp_nccs - tmp_noovs) * log_to_log2;
		nccs += tmp_nccs;
		noovs += tmp_noovs;
                lscr += tmp_lscr;
		nwords += n;
		
		ckd_free(words);
	}

	ch /= (nwords - nccs - noovs);
	printf("cross-entropy: %f bits\n", ch);

	/* Calculate perplexity pplx = exp CH */
	printf("perplexity: %f\n", pow(2.0, ch));
        printf("lm score: %d\n", lscr);

	/* Report OOVs and CCs */
	printf("%d words evaluated\n", nwords);
	printf("%d OOVs (%.2f%%), %d context cues removed\n",
	       noovs, (double)noovs / nwords * 100, nccs);
}
Beispiel #6
0
static int
open_nist_file(sphinx_wave2feat_t *wtf, char const *infile, FILE **out_fh, int detect_endian)
{
    char nist[7];
    lineiter_t *li;
    FILE *fh;

    if ((fh = fopen(infile, "rb")) == NULL) {
        E_ERROR_SYSTEM("Failed to open %s", infile);
        return -1;
    }
    if (fread(&nist, 1, 7, fh) != 7) {
        E_ERROR_SYSTEM("Failed to read NIST header");
        fclose(fh);
        return -1;
    }
    /* Is this actually a NIST file? */
    if (0 != strncmp(nist, "NIST_1A", 7)) {
        fclose(fh);
        return FALSE;
    }
    /* Rewind, parse lines. */
    fseek(fh, 0, SEEK_SET);
    for (li = lineiter_start(fh); li; li = lineiter_next(li)) {
        char **words;
        int nword;

        string_trim(li->buf, STRING_BOTH);
        if (strlen(li->buf) == 0) {
            lineiter_free(li);
            break;
        }
        nword = str2words(li->buf, NULL, 0);
        if (nword != 3)
            continue;
        words = (char **)ckd_calloc(nword, sizeof(*words));
        str2words(li->buf, words, nword);
        if (0 == strcmp(words[0], "sample_rate")) {
            cmd_ln_set_float32_r(wtf->config, "-samprate", atof_c(words[2]));
        }
        if (0 == strcmp(words[0], "channel_count")) {
            cmd_ln_set_int32_r(wtf->config, "-nchans", atoi(words[2]));
        }
        if (detect_endian && 0 == strcmp(words[0], "sample_byte_format")) {
            cmd_ln_set_str_r(wtf->config, "-input_endian",
                             (0 == strcmp(words[2], "10")) ? "big" : "little");
        }
        ckd_free(words);
    }

    fseek(fh, 1024, SEEK_SET);
    if (out_fh)
        *out_fh = fh;
    else
        fclose(fh);
    return TRUE;
}
state_t *next_utt_states(uint32 *n_state,
                         lexicon_t *lex,
                         model_inventory_t *inv,
                         model_def_t *mdef,
                         char *trans
                        )
{
    char **word;
    char *utterance;
    uint32 n_word;
    uint32 n_phone;
    char *btw_mark;
    acmod_set_t *acmod_set;
    acmod_id_t *phone;

    state_t *state_seq;

    utterance = ckd_salloc(trans);
    n_word = str2words(utterance, NULL, 0);
    word = ckd_calloc(n_word, sizeof(char*));
    str2words(utterance, word, n_word);

    phone = mk_phone_list(&btw_mark, &n_phone, word, n_word, lex);

    if (phone == NULL) {
        E_WARN("Unable to produce phonetic transcription for the utterance '%s'\n", trans);
        ckd_free(word);
        return NULL;
    }

    acmod_set = inv->acmod_set;

#ifdef NEXT_UTT_STATES_VERBOSE
    print_phone_list(phone, n_phone, btw_mark, acmod_set);
#endif

    cvt2triphone(acmod_set, phone, btw_mark, n_phone);

#ifdef NEXT_UTT_STATES_VERBOSE
    print_phone_list(phone, n_phone, btw_mark, acmod_set);
#endif

    state_seq = state_seq_make(n_state, phone, n_phone, inv, mdef);

#ifdef NEXT_UTT_STATES_VERBOSE
    state_seq_print(state_seq, *n_state, mdef);
#endif

    ckd_free(phone);
    ckd_free(btw_mark);
    ckd_free(word);
    ckd_free(utterance);

    return state_seq;
}
Beispiel #8
0
static void process_utt (char *uttfile, int32 sf, int32 ef, char *uttid)
{
    int32 i, f, nwd;
    char *str;
    char tmp[65535], *wdp[4096];
    
    for (i = 0; i < n_inhyp; i++) {
	if ((str = corpus_lookup (inhyp[i], uttid)) != NULL)
	    break;
    }
    if (i >= n_inhyp)
	E_ERROR("%s: Missing\n", uttid);
    else {
	strcpy (tmp, str);
	if ((nwd = str2words (tmp, wdp, 4095)) < 0)
	    E_FATAL("str2words failed\n");
	if ((nwd == 0) || (sscanf (wdp[nwd-1], "%d", &f) != 1) || (f != (ef-sf+1)))
	    E_ERROR("%s: Bad hyp in %s: %s\n", uttid, infilename[i], str);
	else {
	    fprintf (outfp, "%s %s\n", uttid, str);
	    fflush (outfp);
	    E_INFO("%s: Extracted from %s\n", uttid, infilename[i]);
	}
    }
}
Beispiel #9
0
void Raw::onBotnetcmd(const char *from, const char *cmd)
{
    char arg[2][MAX_LEN], *text;

    str2words(arg[0], cmd, 2, MAX_LEN, 0);

    if(!strcasecmp(arg[1], "raw"))
    {
        text=srewind(cmd, 2);

        if(text && *text)
        {
            if(penalty<10)
            {
                net.irc.send(text, NULL);
                penalty+=calculatePenalty(text);
            }

            else
                net.sendOwner(arg[0], "[raw] Penalty is too high. Please wait a while and try again.", NULL);
        }

        else
            net.sendOwner(arg[0], "[raw] Syntax: .bc ", (const char*) config.handle, " raw <text>", NULL);
    }
}
Beispiel #10
0
int
dict_add_g2p_word(dict_t * dict, char const *word)
{
    int32 wid = 0;
    s3cipid_t *pron;
    char **phonestr, *tmp;
    int np, i;
    char *phones;

    phones = dict_g2p(word, dict->ngram_g2p_model);
    if (phones == NULL)
        return 0;

    E_INFO("Adding phone %s for word %s \n",  phones, word);
    tmp = ckd_salloc(phones);
    np = str2words(tmp, NULL, 0);
    phonestr = ckd_calloc(np, sizeof(*phonestr));
    str2words(tmp, phonestr, np);
    pron = ckd_calloc(np, sizeof(*pron));
    for (i = 0; i < np; ++i) {
        pron[i] = bin_mdef_ciphone_id(dict->mdef, phonestr[i]);
        if (pron[i] == -1) {
            E_ERROR("Unknown phone %s in phone string %s\n",
                    phonestr[i], tmp);
            ckd_free(phonestr);
            ckd_free(tmp);
            ckd_free(pron);
            ckd_free(phones);
            return -1;
        }
    }
    ckd_free(phonestr);
    ckd_free(tmp);
    ckd_free(phones);
    if ((wid = dict_add_word(dict, word, pron, np)) == -1) {
        ckd_free(pron);
        return -1;
    }
    ckd_free(pron);

    return wid;
}
Beispiel #11
0
/* Validation function for loading a hypseg corpus */
static int32 validate (char *str)
{
    char tmp[65535], *wdp[4096];
    int32 nwd;
    
    strcpy (tmp, str);
    if ((nwd = str2words (tmp, wdp, 4095)) < 0)
	E_FATAL("str2words failed\n");
    if ((nwd > 0) && (strcmp (wdp[nwd-1], "(null)") == 0))
	return 0;	/* Exclude (null) hypotheses */
    return 1;
}
Beispiel #12
0
main (int32 argc, char *argv[])
{
    dict_t **d;
    int32 i, k, p, wid;
    char line[16384], *wp[1024];
    
    if (argc < 2) {
	E_INFO("Usage: %s dictfile [dictfile ...] < vocabfile\n", argv[0]);
	exit(0);
    }
    d = (dict_t **) ckd_calloc (argc-1, sizeof(dict_t *));
    
    for (i = 1; i < argc; i++)
	d[i-1] = dict_init (NULL, argv[i], NULL, 0);
    
    while (fgets (line, sizeof(line), stdin) != NULL) {
	if ((k = str2words (line, wp, 1024)) < 0)
	    E_FATAL("Line too long: %s\n", line);
	if (k > 2)
	    E_FATAL("Vocab entry contains too many words\n");
	
	if (k == 0)
	    continue;
	if (k == 1)
	    wp[1] = wp[0];
	
	/* Look up word in each dictionary until found */
	k = 0;
	for (i = 0; (i < argc-1) && (k == 0); i++) {
	    wid = dict_wordid (d[i], wp[1]);
	    if (NOT_WID(wid))
		continue;
	    
	    for (wid = dict_basewid(d[i], wid);
		 IS_WID(wid);
		 wid = dict_nextalt(d[i], wid)) {
		k++;
		if (k == 1)
		    printf ("%s\t", wp[0]);
		else
		    printf ("%s(%d)\t", wp[0], k);
		
		for (p = 0; p < dict_pronlen(d[i], wid); p++)
		    printf (" %s", dict_ciphone_str (d[i], wid, p));
		printf ("\n");
	    }
	}
	if (k == 0)
	    E_ERROR("No pronunciation for: '%s'\n", wp[0]);
    }
}
Beispiel #13
0
static int32 dict_read (FILE *fp, dict_t *d)
{
    char line[16384], **wptr;
    s3cipid_t p[4096];
    int32 lineno, nwd;
    s3wid_t w;
    int32 i, maxwd;
    
    maxwd = 4092;
    wptr = (char **) ckd_calloc (maxwd, sizeof(char *));
    
    lineno = 0;
    while (fgets (line, sizeof(line), fp) != NULL) {
        lineno++;
	if (line[0] == '#')	/* Comment line */
	    continue;
	
	if ((nwd = str2words (line, wptr, maxwd)) < 0)
	    E_FATAL("str2words(%s) failed; Increase maxwd from %d\n", line, maxwd);

	if (nwd == 0)	    /* Empty line */
	    continue;
	/* wptr[0] is the word-string and wptr[1..nwd-1] the pronunciation sequence */
	if (nwd == 1) {
	    E_ERROR("Line %d: No pronunciation for word %s; ignored\n", lineno, wptr[0]);
	    continue;
	}
	
	/* Convert pronunciation string to CI-phone-ids */
	for (i = 1; i < nwd; i++) {
	    p[i-1] = dict_ciphone_id (d, wptr[i]);
	    if (NOT_CIPID(p[i-1])) {
		E_ERROR("Line %d: Bad ciphone: %s; word %s ignored\n",
			lineno, wptr[i], wptr[0]);
		break;
	    }
	}
	
	if (i == nwd) {	/* All CI-phones successfully converted to IDs */
	    w = dict_add_word (d, wptr[0], p, nwd-1);
	    if (NOT_WID(w))
		E_ERROR("Line %d: dict_add_word (%s) failed (duplicate?); ignored\n",
			lineno, wptr[0]);
	}
    }

    ckd_free (wptr);
    
    return 0;
}
Beispiel #14
0
/**
 * Map the given ngram string to an array of word IDs of the individual
 * words in the ngram.
 *
 * args:
 * ngram - the ngram string to map
 * length - the length of the ngram string
 * w - the word ID array
 * lm - the language model to use
 *
 * returns:
 * the number of words in the ngram string, or 0 if the string contains an
 * unknown word
 */
int
ngram2wid(char *ngram, int length, s3lmwid32_t * w, lm_t * lm)
{
    char *word[1024];
    int nwd;
    int i;

    if ((nwd = str2words(ngram, word, length)) < 0)
        E_FATAL("Increase word[] and w[] arrays size\n");

    for (i = 0; i < nwd; i++) {
        w[i] = lm_wid(lm, word[i]);
        if (NOT_LMWID(lm, w[i])) {
            E_ERROR("Unknown word: %s\n", word[i]);
            return 0;
        }
    }

    return nwd;
}
Beispiel #15
0
static int32
nextline_str2words(FILE * fp, int32 * lineno,
		   char *line, int32 max_line,
                   char **wordptr, int32 max_ptr)
{
    int32 n;

    for (;;) {
        if (fgets(line, max_line, fp) == NULL)
            return -1;

        (*lineno)++;

        if (line[0] != WORD_FSG_COMMENT_CHAR) { /* Skip comment lines */
            if ((n = str2words(line, wordptr, max_ptr)) < 0)
                E_FATAL("Line[%d] too long\n", *lineno);

            if (n > 0)          /* Skip blank lines */
                break;
        }
    }

    return n;
}
Beispiel #16
0
main (int32 argc, char *argv[])
{
    char line[16384], **wptr;
    int32 i, n, k, np;
    
    if (argc > 1) {
	E_INFO("Usage: %s < <result-of-pronerralign>\n", argv[0]);
	exit(0);
    }
    
    for (np = 0; phonestr[np]; np++);
    E_INFO("%d phones\n");
    
    wptr = (char **) ckd_calloc (MAX_WORDS, sizeof(char *));
    
    while (fgets (line, sizeof(line), stdin) != NULL) {
	if ((n = str2words (line, wptr, MAX_WORDS)) < 0)
	    E_FATAL("str2words(%s) failed; increase %d(?)\n", line, MAX_WORDS);
	
	/* Read first (count) field */
	if (n == 0) continue;
	if (sscanf (wptr[0], "%d", &k) != 1)
	    E_FATAL("First field not a count: %s\n", wptr[0]);
	
	/* Find => separator after word list */
	for (i = 0; (i < n) && (strcmp (wptr[i], "=>") != 0); i++);
	i++;		/* Hopefully at (lc) */

	/* Must have at least: (lc) p1 p2 (rc) */
	if (n-i <= 3)
	    continue;	
	assert (i > 2);
	
	if ((strcmp (wptr[i+1], "[[") != 0) && (strcmp (wptr[i+2], "[[") != 0)) {
	    /* No error */
	    printf ("%6d %-5s %-5s %-5s          %s\n",
		    k, wptr[i], wptr[i+1], wptr[i+2], wptr[1]);
	} else if (strcmp (wptr[i+1], "[[") == 0) {
	    /*
	     * First phone got transformed.  Must be:
	     *     (lc) [[ => ee ]] p2 (rc),
	     *     (lc) [[ ee => ]] p2 (rc), or
	     *     (lc) [[ pp => ee ]] p2 (rc)
	     */
	    if (n-i <= 6)
		continue;
	    
	    if ((strcmp (wptr[i+2], "=>") == 0) &&
		(strcmp (wptr[i+4], "]]") == 0) &&
		(strcmp (wptr[i+5], "[[") != 0)) {
		printf ("%6d %-5s %-5s %-5s => %-5s %s\n",
			k, wptr[i], wptr[i+3], wptr[i+5], "--", wptr[1]);
	    } else if ((strcmp (wptr[i+3], "=>") == 0) &&
		       (strcmp (wptr[i+4], "]]") == 0) &&
		       (strcmp (wptr[i+5], "[[") != 0)) {
		printf ("%6d %-5s %-5s %-5s => %-5s %s\n",
			k, wptr[i], "--", wptr[i+5], wptr[i+2], wptr[1]);
	    } else if ((strcmp (wptr[i+3], "=>") == 0) &&
		       (strcmp (wptr[i+5], "]]") == 0) &&
		       (strcmp (wptr[i+6], "[[") != 0) &&
		       (n-i > 7)) {
		printf ("%6d %-5s %-5s %-5s => %-5s %s\n",
			k, wptr[i], wptr[i+4], wptr[i+6], wptr[i+2], wptr[1]);
	    }
	}
    }
}
Beispiel #17
0
void parse_hub(char *data)
{
	char arg[10][MAX_LEN];
	chan *ch;

	if(!strlen(data)) return;
	str2words(arg[0], data, 10, MAX_LEN);

	if(!(net.hub.status & STATUS_REGISTERED))
	{
		switch(net.hub.tmpint)
		{
			/*
			case 0:
			{
				//3 bytes for WILL ECHO OFF + 1 byte for NEW LINE
				++net.hub.tmpint;
				//enable encryption
				net.hub.enableCrypt((unsigned char *) config.botnetword, strlen(config.botnetword));
				return;
			}
			*/
			case 1:
			{
				if(strlen(arg[0]))
				{
					char hash[33];
					++net.hub.tmpint;
					//unsigned char *dupa = ((entMD5Hash *) &config.currentHub->getPass())->getHash();

					MD5HexHash(hash, arg[0], AUTHSTR_LEN, ((entMD5Hash *) &config.hub.getPass())->getHash(), 16);
					net.hub.send(config.handle, " ", hash, NULL);

					net.hub.tmpstr = (char *) malloc(AUTHSTR_LEN + 1);
					MD5CreateAuthString(net.hub.tmpstr, AUTHSTR_LEN);
					net.hub.send(net.hub.tmpstr, NULL);
                    return;
				}
				break;
			}
			case 2:
			{
				if(strlen(arg[3]))
				{
					if(MD5HexValidate(arg[3], net.hub.tmpstr, strlen(net.hub.tmpstr), ((entMD5Hash *) &config.hub.getPass())->getHash(), 16))
					{
						char buf[MAX_LEN];

						++net.hub.tmpint;
						userlist.addHandle(arg[0], 0, B_FLAGS | HAS_H | HAS_L, arg[1], arg[2], 0);
						net.hub.handle = userlist.findHandle(arg[0]);
						DEBUG(printf("[D] hub handle: %s\n", net.hub.handle->name));
						free(net.hub.tmpstr);
						net.hub.tmpstr = NULL;

						if(config.bottype != BOT_SLAVE)
							sprintf(buf, "%llu", userlist.SN);
						else
							strcpy(buf, "0");

						net.hub.send(S_REGISTER, " ", S_VERSION, " ", buf, " ", (const char *) ME.nick, " ", net.irc.origin, NULL);
						return;
					}
				}
				break;
			}
			case 3:
			{
				if(!strcmp(arg[0], S_REGISTER))
				{
					mem_strcpy(net.hub.name, arg[1]);
					net.hub.tmpint = 0;
					net.hub.status |= STATUS_CONNECTED | STATUS_REGISTERED | STATUS_BOT;
					net.hub.killTime = NOW + set.CONN_TIMEOUT;
					net.hub.lastPing = NOW;

					net.hub.enableCrypt(((entMD5Hash *) &config.hub.getPass())->getHash(), 16);

					net.sendBotListTo(&net.hub);
					net.propagate(&net.hub, S_BJOIN, " ", net.hub.name, NULL);
					config.currentHub->failures = 0;
					net.propagate(NULL, S_CHNICK, " ", (const char *) ME.nick, " ", net.irc.origin, NULL);
					return;
				}
			}
			default: break;
		}
		/* HUH */
		net.hub.close("Access Denied");
	}

	/* REGISTERED HUB */
	net.hub.killTime = NOW + set.CONN_TIMEOUT;

	if(!strcmp(arg[0], S_UL_UPLOAD_START))
	{
		if(userlist.ulbuf)
		{
			net.send(HAS_N, "[!] Double UL download, this should not happen", NULL);
			sleep(5);
			net.send(HAS_N, "[!] Terminating.", NULL);
			exit(1337);
		}
		userlist.ulbuf = new Pchar(64*1024);
		return;
	}
	if(!strcmp(arg[0], S_UL_UPLOAD_END))
	{
		if(!userlist.ulbuf)
		{
			net.send(HAS_N, "[!] Update userlist is empty", NULL);
			net.send(HAS_N, "[-] Disconnecting", NULL);
			net.hub.close("Userlist is empty");
			return;
		}
		
		userlist.update();
		if(userlist.me()->flags[GLOBAL] & HAS_P)
			hostNotify = 1;
		else
			hostNotify = 0;

		userlist.sendToAll();
		return;
	}
	if(userlist.ulbuf)
	{
		userlist.ulbuf->push(data);
		userlist.ulbuf->push("\n");
		return;
	}

	if(!strcmp(arg[0], S_CYCLE) && strlen(arg[1]))
	{
		if(ME.findChannel(arg[1]))
		{
			net.irc.send("PART ", arg[1], " :", (const char *) config.cyclereason, NULL);
			ME.rejoin(arg[1], set.CYCLE_DELAY);

			if(strlen(arg[2]))
				net.send(HAS_N, "[*] Doing cycle on ", arg[1], NULL);

		}
		net.propagate(&net.hub, data, NULL);
		return;
	}
	if(!strcmp(arg[0], S_MKA) && strlen(arg[1]))
	{
		ch = ME.findChannel(arg[1]);
		if(ch) ch->massKick(MK_ALL, !strcmp(arg[3], "close") || !strcmp(arg[3], "lock"));
		return;
	}
	if(!strcmp(arg[0], S_MKO) && strlen(arg[1]))
	{
		ch = ME.findChannel(arg[1]);
		if(ch) ch->massKick(MK_OPS, !strcmp(arg[3], "close") || !strcmp(arg[3], "lock"));
		return;
	}
	if(!strcmp(arg[0], S_MKN) && strlen(arg[1]))
	{
		ch = ME.findChannel(arg[1]);
		if(ch) ch->massKick(MK_NONOPS, !strcmp(arg[3], "close") || !strcmp(arg[3], "lock"));
		return;
	}
	if(!strcmp(arg[0], S_UNLINK) && strlen(arg[1]))
	{
		HANDLE *h = userlist.findHandle(arg[1]);

		if(h && userlist.isBot(h))
		{
			inetconn *bot = net.findConn(h);
			if(bot) bot->close("Forced unlink");
		}
		return;
	}
	if(!strcmp(arg[0], S_NICK) && strlen(arg[1]))
	{
		net.irc.send("NICK ", arg[1], NULL);
		ME.nextNickCheck = NOW + set.KEEP_NICK_CHECK_DELAY;
		return;
	}
	if(!strcmp(arg[0], S_JUMP) && strlen(arg[2]))
	{
		ME.jump(arg[2], arg[3], arg[1]);
		return;
	}
#ifdef HAVE_IPV6
	if(!strcmp(arg[0], S_JUMP6) && strlen(arg[2]))
	{
		ME.jump(arg[2], arg[3], arg[1], AF_INET6);
		return;
	}
#endif
	if(!strcmp(arg[0], S_JUMPS5) && strlen(arg[5]))
	{
		ME.jumps5(arg[2], atoi(arg[3]), arg[4], atoi(arg[5]), arg[1]);
		return;
	}

	if(!strcmp(arg[0], S_RDIE) && strlen(arg[1]))
	{
		net.send(HAS_N, "[!] ", DIE_REASON, NULL);
		net.irc.send("QUIT :", arg[1], " ", DIE_REASON2, NULL);
		safeExit();
	}
	if(!strcmp(arg[0], S_NAMES) && strlen(arg[2]))
	{
		ch = ME.findChannel(arg[2]);
		if(ch)
			ch->names(arg[1]);
		else net.sendOwner(arg[1], "Invalid channel", NULL);
		return;
	}

	if(!strcmp(arg[0], S_CWHO) && strlen(arg[2]))
	{
		ch = ME.findChannel(arg[2]);
		if(ch)
			ch->cwho(arg[1], arg[3]);
		else net.sendOwner(arg[1], "Invalid channel", NULL);
		return;
	}
	if(!strcmp(arg[0], S_PSOTUPDATE))
	{
		psotget.forkAndGo(arg[1]);
		return;
	}
	if(!strcmp(arg[0], S_STOPUPDATE))
	{
		psotget.end();
		return;
	}
	if(!strcmp(arg[0], S_RESTART))
	{
		ME.restart();
		return;
	}
	if(!strcmp(arg[0], S_ULSAVE))
	{
		userlist.save(config.userlist_file);
		ME.nextRecheck = NOW + 5;
		net.propagate(&net.hub, data, NULL);
		return;
	}
	if(!strcmp(arg[0], S_RJOIN) && strlen(arg[2]))
	{
		userlist.rjoin(arg[1], arg[2]);
		net.propagate(&net.hub, data, NULL);
		++userlist.SN;
		return;
	}
	if(!strcmp(arg[0], S_RPART) && strlen(arg[2]))
	{
		userlist.rpart(arg[1], arg[2], arg[3]);
		net.propagate(&net.hub, data, NULL);
		++userlist.SN;
		return;
	}
	if(!strcmp(arg[0], S_STATUS) && strlen(arg[1]))
	{
		ME.sendStatus(arg[1]);
		return;
	}
	if(!strcmp(arg[0], S_CHKHOST) && strlen(arg[1]))
	{
		ME.checkMyHost(arg[1]);
		return;
	}

	if(parse_botnet(&net.hub, data)) return;

	if(userlist.parse(data))
	{
		++userlist.SN;

		//some things should not be propagated
		if(config.bottype == BOT_SLAVE)
		{
			if(!strcmp(S_ADDBOT, arg[0]))
			{
				net.propagate(&net.hub, S_ADDBOT, " ", arg[1], " ", arg[2], " ", arg[3], " ", S_SECRET, NULL);
				return;
			}
			if(!strcmp(S_PASSWD, arg[0]) && userlist.isBot(arg[1]))
			{
				net.propagate(&net.hub, S_PASSWD, " ", arg[1], " ", "00000000000000000000000000000000", NULL);
				return;
			}
			if(!strcmp(S_ADDR, arg[0]) && userlist.isBot(arg[1]))
			{
				net.propagate(&net.hub, S_ADDR, " ", arg[1], " ", "0.0.0.0", NULL);
				return;
			}
			if(!strcmp(S_ADDOFFENCE, arg[0])) // leaf dont need infos about offence-history
				return;

		}
		net.propagate(&net.hub, data, NULL);
		return;
	}
}
Beispiel #18
0
int
main(int argc, char *argv[])
{
    if (argc < 2)
        return 1;

    if (!strcmp(argv[1], "string_join")) {
        char *foo = string_join("bar", "baz", "quux", NULL);
        if (strcmp(foo, "barbazquux") != 0) {
            printf("%s != barbazquux\n", foo);
            return 1;
        }
        foo = string_join("hello", NULL);
        if (strcmp(foo, "hello") != 0) {
            printf("%s != hello\n", foo);
            return 1;
        }
        return 0;
    }
    else if (!strcmp(argv[1], "fread_line")) {
        FILE *fp = fopen(TESTDATADIR "/_fread_line.txt", "r");
        char *line;
        size_t len;

        if (fp == NULL) {
            perror("Failed to open " TESTDATADIR "/_fread_line.txt");
            return 1;
        }
        line = fread_line(fp, &len);
        printf("len = %d orig = %d\n", len,
               strlen("Hello world!\n"));
        if (strcmp(line, "Hello world!\n") != 0) {
            printf("'%s' != 'Hello world!\\n'\n", line);
            return 1;
        }
        ckd_free(line);
        line = fread_line(fp, &len);
        /* A line of exactly 127 characters. */
        printf("len = %d orig = %d\n", len,
               strlen("123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456\n"));
        if (strcmp(line, "123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456\n") != 0) {
            printf("'%s' != '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456\\n'\n", line);
            return 1;
        }
        ckd_free(line);
        /* A very long line. */
        line = fread_line(fp, &len);
        printf("len = %d orig = %d\n", len,
               strlen("All work and no play makes Jack a very dull boy.  All work and no play makes Jack a very dull boy.  All work and no play makes Jack a very dull boy.  All work and no play makes Jack a very dull boy.  All work and no play makes Jack a very dull boy.  All work and no play makes Jack a very dull boy.  \n"));
        if (strcmp(line, "All work and no play makes Jack a very dull boy.  All work and no play makes Jack a very dull boy.  All work and no play makes Jack a very dull boy.  All work and no play makes Jack a very dull boy.  All work and no play makes Jack a very dull boy.  All work and no play makes Jack a very dull boy.  \n") != 0) {
            printf("'%s' != 'All work and no play makes Jack a very dull boy.  All work and no play makes Jack a very dull boy.  All work and no play makes Jack a very dull boy.  All work and no play makes Jack a very dull boy.  All work and no play makes Jack a very dull boy.  All work and no play makes Jack a very dull boy.  \\n'\n", line);
            return 1;
        }
        ckd_free(line);
        line = fread_line(fp, &len);
        if (line != NULL) {
            printf("%p != NULL\n", line);
            return 1;
        }
    }
    else if (!strcmp(argv[1], "string_trim")) {
        char *foo = ckd_salloc("\t foo bar baz  \n");
        string_trim(foo, STRING_BOTH);
        if (strcmp(foo, "foo bar baz") != 0) {
            printf("'%s' != 'foo bar baz'\n", foo);
            return 1;
        }
        string_trim(foo, STRING_BOTH);
        if (strcmp(foo, "foo bar baz") != 0) {
            printf("'%s' != 'foo bar baz'\n", foo);
            return 1;
        }
        strcpy(foo, "foo\nbar\n\n");
        string_trim(foo, STRING_END);
        if (strcmp(foo, "foo\nbar") != 0) {
            printf("'%s' != 'foo\\nbar'\n", foo);
            return 1;
        }
        strcpy(foo, " \t \t foobar\n");
        string_trim(foo, STRING_START);
        if (strcmp(foo, "foobar\n") != 0) {
            printf("'%s' != 'foobar\\n'\n", foo);
            return 1;
        }
    }
    else if (!strcmp(argv[1], "str2words")) {
        char *line = ckd_salloc("    foo bar baz argh");
        char **words;
        int n;

        n = str2words(line, NULL, 0);
        if (n != 4) {
            printf("%d != 4\n", n);
            return 1;
        }
        words = ckd_calloc(n, sizeof(*words));
        n = str2words(line, words, n);
        if (n != 4) {
            printf("%d != 4\n", n);
            return 1;
        }
        if (strcmp(words[0], "foo") != 0
            || strcmp(words[1], "bar") != 0
            || strcmp(words[2], "baz") != 0
            || strcmp(words[3], "argh") != 0) {
            printf("%s, %s, %s, %s != foo, bar, baz, argh\n",
                   words[0], words[1], words[2], words[3]);
            return 1;
        }
        return 0;
    }
    else if (!strcmp(argv[1], "nextword")) {
        char *line = ckd_salloc(" \tfoo bar\nbaz argh");
        char *word;
        const char *delim = " \t\n";
        char delimfound;
        int n;

        n = nextword(line, delim, &word, &delimfound);
        if (strcmp(word, "foo") != 0) {
            printf("%s != foo\n", word);
            return 1;
        }
        if (delimfound != ' ') {
            printf("didn't find ' '\n");
            return 1;
        }
        word[n] = delimfound;
        line = word + n;
        n = nextword(line, delim, &word, &delimfound);
        if (strcmp(word, "bar") != 0) {
            printf("%s != bar\n", word);
            return 1;
        }
        if (delimfound != '\n') {
            printf("didn't find '\\n'\n");
            return 1;
        }
        word[n] = delimfound;
        line = word + n;
        n = nextword(line, delim, &word, &delimfound);
        if (strcmp(word, "baz") != 0) {
            printf("%s != baz\n", word);
            return 1;
        }
        if (delimfound != ' ') {
            printf("didn't find ' '\n");
            return 1;
        }
        word[n] = delimfound;
        line = word + n;
        n = nextword(line, delim, &word, &delimfound);
        if (strcmp(word, "argh") != 0) {
            printf("%s != argh\n", word);
            return 1;
        }
        if (delimfound != '\0') {
            printf("didn't find NUL\n");
            return 1;
        }
        word[n] = delimfound;
        line = word + n;
        n = nextword(line, delim, &word, &delimfound);
        if (n != -1) {
            printf("didn't get -1 at end of string\n");
        }

        line = ckd_salloc("FOO!");
        n = nextword(line, delim, &word, &delimfound);
        if (strcmp(word, "FOO!") != 0) {
            printf("%s != FOO!\n", word);
            return 1;
        }
        if (delimfound != '\0') {
            printf("didn't find NUL\n");
            return 1;
        }

        return 0;
    }
    return 0;
}
Beispiel #19
0
int
agg_phn_seg(lexicon_t *lex,
	    acmod_set_t *acmod_set,
	    feat_t *fcb,
	    segdmp_type_t type)
{
    uint16 *seg;
    vector_t *mfcc;
    vector_t **feat;
    int32 n_frame;
    uint32 tick_cnt;

    acmod_id_t *phone;
    uint32 *start;
    uint32 *len;
    uint32 n_phone;
    uint32 s;
    char *btw_mark;

    char *trans;
    char **word;
    uint32 n_word;
    int32 mfc_veclen = cmd_ln_int32("-ceplen");

    uint32 n_stream;
    uint32 *veclen;

    tick_cnt = 0;

    n_stream = feat_dimension1(fcb);
    veclen = feat_stream_lengths(fcb);

    while (corpus_next_utt()) {
	if ((++tick_cnt % 500) == 0) {
	    E_INFOCONT("[%u] ", tick_cnt);
	}

	if (corpus_get_sent(&trans) != S3_SUCCESS) {
	    E_FATAL("Unable to read word transcript for %s\n", corpus_utt_brief_name());
	}

	if (corpus_get_seg(&seg, &n_frame) != S3_SUCCESS) {
	    E_FATAL("Unable to read Viterbi state segmentation for %s\n", corpus_utt_brief_name());
	}
	    
	n_word = str2words(trans, NULL, 0);
	word = ckd_calloc(n_word, sizeof(char*));
	str2words(trans, word, n_word);

	phone = mk_phone_list(&btw_mark, &n_phone, word, n_word, lex);
	start = ckd_calloc(n_phone, sizeof(uint32));
	len = ckd_calloc(n_phone, sizeof(uint32));

	/* check to see whether the word transcript and dictionary entries
	   agree with the state segmentation */
	if (ck_seg(acmod_set, phone, n_phone, seg, n_frame, corpus_utt()) != S3_SUCCESS) {
	    free(trans);	/* alloc'ed using strdup, not ckd_*() */
	    free(seg);	/* alloc'ed using malloc in areadshort(), not ckd_*() */
	    ckd_free(word);
	    ckd_free(phone);
	    
	    E_ERROR("ck_seg failed");

	    continue;
	}

	if (cvt2triphone(acmod_set, phone, btw_mark, n_phone) != S3_SUCCESS) {
	    free(trans);	/* alloc'ed using strdup, not ckd_*() */
	    free(seg);		/* alloc'ed using malloc in areadshort(), not ckd_*() */
	    ckd_free(word);
	    ckd_free(phone);

	    E_ERROR("cvt2triphone failed");
	    
	    continue;
	}

	ckd_free(btw_mark);

	if (mk_seg(acmod_set,
		   seg,
		   n_frame,
		   phone,
		   start,
		   len,
		   n_phone) != S3_SUCCESS) {
	    free(trans);
	    free(seg);
	    ckd_free(word);
	    ckd_free(phone);

	    E_ERROR("mk_seg failed");
	    continue;
	}
	
	if (corpus_provides_mfcc()) {
    	        if (corpus_get_generic_featurevec(&mfcc, &n_frame, mfc_veclen) < 0) {
		      E_FATAL("Can't read input features from %s\n", corpus_utt());
		}
		
		if (n_frame < 9) {
		  E_WARN("utt %s too short\n", corpus_utt());
		  if (mfcc) {
		    ckd_free(mfcc[0]);
		    ckd_free(mfcc);
		    mfcc = NULL;
		  }
		  continue;
		}

		feat = feat_array_alloc(fcb, n_frame + feat_window_size(fcb));
	        feat_s2mfc2feat_live(fcb, mfcc, &n_frame, TRUE, TRUE, feat);

		for (s = 0; s < n_phone; s++) {
		    segdmp_add_feat(phone[s],
				    &feat[start[s]],
				    len[s]);
		}

		feat_array_free(feat);
		free(&mfcc[0][0]);
		ckd_free(mfcc);
	}
	else {
	    E_FATAL("No data type specified\n");
	}

	free(trans);	/* alloc'ed using strdup, not ckd_*() */
	free(seg);	/* alloc'ed using malloc in areadshort(), not ckd_*() */
	ckd_free(word);
	ckd_free(phone);
	ckd_free(start);
	ckd_free(len);
    }

    return 0;
}
Beispiel #20
0
int Raw::calculatePenalty(const char *data)
{
    char argv[10][MAX_LEN], *name, *p=NULL;
    int len, argc, mypenalty=0;
    const int maxpenalty=10;

    len=strlen(data);
    mypenalty=(1+len/100);

    argc=str2words(argv[0], data, 10, MAX_LEN, 0);

    if(!strcasecmp(argv[0], "MODE"))
    {
        /* argv[1] = target; channels and/or user
         * argv[2] = optional modes
         * argv[n] = optional parameters
         */

        for(name=strtok_r(argv[1], ",", &p); name; name=strtok_r(NULL, ",", &p))
        {
            if(chan::isChannel(name))
                calculatePenaltyOfChanmode(argv[2], argc-3, &mypenalty);

            else
                calculatePenaltyOfUsermode(argv[2], &mypenalty);
        }
    }

    else if(!strcasecmp(argv[0], "UMODE"))
    {
        /* argv[1] - username to change mode for
         * argv[2] - modes to change
         */
 
        calculatePenaltyOfUsermode(argv[2], &mypenalty);
    }

    else if(!strcasecmp(argv[0], "KICK"))
    {
        /* argv[1] = channel
         * argv[2] = client to kick
         * argv[3] = kick comment
         */

        int user_cnt=0;

        // count users to kick out
        for(name=strtok_r(argv[2], ",", &p); name; name=strtok_r(NULL, ",", &p))
            user_cnt++;

        // if there are multiple channels, the users will be kicked out on each one
        for(name=strtok_r(argv[1], ",", &p); name; name=strtok_r(NULL, ",", &p))
            mypenalty+=3*user_cnt; // do not care if kick was successful, just to go the maximum

        /* alternative:
         * 
         * mypenalty+=user_cnt;
         * TODO: we must increase the penalty (+2) if the kick was successful -> parse_irc()
         */
    }

    else if(!strcasecmp(argv[0], "PRIVMSG") || !strcasecmp(argv[0], "NOTICE"))
    {
        // argv[1] = receiver list
        // argv[2] = text

        for(name=strtok_r(argv[1], ",", &p); name; name=strtok_r(NULL, ",", &p))
            mypenalty+=1;
    }

    else if(!strcasecmp(argv[0], "TOPIC"))
    {
        /* argv[1] = channel list
         * argv[2] = topic
         */

        mypenalty+=1;

        if(*argv[2])
        {
            // changing topic
            for(name=strtok_r(argv[1], ",", &p); name; name=strtok_r(NULL, ",", &p))
                mypenalty+=2;
        }
    }

    else if(!strcasecmp(argv[0], "AWAY"))
    {
        // argv[1] = away message

        if(!*argv[1]) // marking as not away
            mypenalty+=1;

        else // marking as away
            mypenalty+=2;
    }

    else if(!strcasecmp(argv[0], "MOTD"))
    {
        // argv[1] = servername

        if(*argv[1]) // remote MOTD
            mypenalty+=5;
        else
            mypenalty+=2;
    }

    else if(!strcasecmp(argv[0], "ADMIN"))
    {
        // argv[1] = servername

        if(*argv[1]) // remote ADMIN
            mypenalty+=3;

        else
            mypenalty+=2;
    }

    else if(!strcasecmp(argv[0], "INFO"))
    {
        // argv[1] = servername

        if(*argv[1]) // remote INFO
            mypenalty+=10;

        else
            mypenalty+=5;
    }

    else if(!strcasecmp(argv[0], "LINKS"))
    {
        /* argv[1] = servername mask
         * or:
         * argv[1] = server to query
         * argv[2] = servername mask
         */

        if(*argv[1] && *argv[2]) // remote LINKS
            mypenalty+=5;

        else
            mypenalty+=2;
    }

    else if(!strcasecmp(argv[0], "NAMES"))
    {
        /* argv[1] = channel list
         * argv[2] = server to query
         */
        
        if(*argv[2])
        {
            // query another irc server for NAMES
            mypenalty+=maxpenalty;
        }

        else if(*argv[1])
        {
            int chan_cnt=1;
            
            for(name=strtok_r(argv[1], ",", &p); name; name=strtok_r(NULL, ",", &p))
                chan_cnt++;

            chan_cnt=chan_cnt<2?2:(chan_cnt*ME.server.isupport.maxchannels)/10;
            mypenalty+=chan_cnt<2?2:chan_cnt;
        }

        else
            mypenalty+=maxpenalty;
    }

    else if(!strcasecmp(argv[0], "LUSERS"))
    {
        /* argv[1] = host/server mask
         * argv[2] = server to query
         */

        if(*argv[1] && *argv[2]) // remote LUSERS
            mypenalty+=3;

        else
            mypenalty+=2;
    }

    else if(!strcasecmp(argv[0], "USERS"))
    {
        // argv[1] = servername

        if(*argv[1])  // remote USERS
            mypenalty+=3;

        else
            mypenalty+=2;
    }

    else if(!strcasecmp(argv[0], "WHO"))
    {
        // argv[1] = nickname mask or channel list
        // argv[2] = additional selection flag (like 'o')

        // FIXME: this can also be maxpenalty
        for(name=strtok_r(argv[1], ",", &p); name; name=strtok_r(NULL, ",", &p))
            mypenalty+=1;
    } 

    else if(!strcasecmp(argv[0], "WHOIS"))
    {
        // argv[1] = nickname masklist

        mypenalty+=2;
        // XXX: we must increase penalty (+1) if we got a whois reply from another server
    }

    else if(!strcasecmp(argv[0], "WHOWAS"))
    {
        /* argv[1] = nickname
         * argv[2] = maximum replies
         * argv[3] = server to query
         */

        for(name=strtok_r(argv[1], ",", &p); name; name=strtok_r(NULL, ",", &p))
        {
            if(*argv[3])
                mypenalty+=3;

            else
                mypenalty+=2;
        }
    }

    else if(!strcasecmp(argv[0], "LIST"))
    {
        // argv[1] = channel list
        // argv[2] = server to query

        if(*argv[2]) // remote LIST
            mypenalty+=10;

        else
            mypenalty+=2;
    }

    else if(!strcasecmp(argv[0], "STATS"))
        mypenalty+=5; // maximum

    else if(!strcasecmp(argv[0], "SQUERY"))
        mypenalty+=2;

    else if(!strcasecmp(argv[0], "INVITE"))
        mypenalty+=3; // maximum

    else if(!strcasecmp(argv[0], "JOIN"))
        mypenalty+=2;

    else if(!strcasecmp(argv[0], "PART"))
        mypenalty+=4;

    else if(!strcasecmp(argv[0], "NICK"))
        mypenalty+=3;

    else if(!strcasecmp(argv[0], "TRACE"))
        mypenalty+=2;

    else if(!strcasecmp(argv[0], "VERSION"))
        mypenalty+=2;

    else if(!strcasecmp(argv[0], "SERVLIST"))
        mypenalty+=2;

    else if(!strcasecmp(argv[0], "MAP"))
        mypenalty+=2;

    else if(!strcasecmp(argv[0], "TIME"))
        mypenalty+=2;

    else if(!strcasecmp(argv[0], "HELP"))
        mypenalty+=2;

    else // everything else, e.g. PING, PONG, ISON
        mypenalty+=1;

    // FIXME: there is no #define DEBUG in config.h :-P

    DEBUG(printf("[D] Adding penalty %d\n", mypenalty));

    return mypenalty;
}
Beispiel #21
0
void SubOp::onPrivmsg(const char *from, const char *to, const char *msg)
{
  // Check if we match any of our keywords
  if (match("!kick *",msg) || match("!kban *",msg) || match("!quick *",msg) || match("!topic *",msg)) {
    char arg[50][MAX_LEN];      // arguments
    char user[MAX_LEN] = "";   //  the user to perform the action on
    char rest[MAX_LEN] = "";    // the rest of the line (minus the user)
    char whole[MAX_LEN] = "";  //  the whole line
    char nick[15] = "";         // local nickname
    char *pch;                 //  position holder
    chan *ch = ME.findChannel(to); // channel we are acting in
    // check if we have a channel
    if(ch) {
      //do i have op ?
      if(ch->me->flags & IS_OP) {
        // get the user who is performing the action
        chanuser *u = ch->getUser(from);
        // check if the user is valid, has the e flag and is voiced or oped in the channel currently
        if((u) && (u->flags & HAS_E) && (u->flags & IS_VOICE || u->flags & IS_OP)) {
          // break up the line
          str2words(arg[0], msg, 50, MAX_LEN, 0);
          // get the user
          strcpy(user,arg[1]);
          // loop through the line to concat it into one string again (minus the user)
          for (int i=2;i < 50;i++) { if (strlen(arg[i]) > 0) { strcat(rest,strcat(arg[i]," ")); } }
          // get the 'whole' line (the user to act on, a space and the rest of the line)
          strcat(whole,user);
          strcat(whole," ");
          strcat(whole,rest);
          // get just the nick from the nick!ident@host string
          pch=strchr(from,'!');
          strncat(nick,from,pch-from);
          // check if we are setting the topic
          if(match("!topic *",msg)) {
            // stick 'nick:' on the front of the topic string
            strcat(nick,":");
            strcat(nick,whole);
            // set the topic
            net.irc.send("TOPIC ", (const char *) ch->name, " :", nick, NULL);
          } else {
            // we arent setting the topic, so we are acting on another user, get that user
            chanuser *o = ch->getUser(user);
            // check if the person is trying to kick either myself or a permanent owner
            if (o && ((o == ch->me) || (o->flags & HAS_X)) && !(u->flags & HAS_X)) { 
              // kick the user for being naughty
              ch->kick(u,"Don't try it, f****r.");
            // check if we are trying to kick someone we shouldnt..
            } else if (o && (o != u) && (!(o->flags & (HAS_E | HAS_O | HAS_H | HAS_S | HAS_L)) || ((u->flags & HAS_X) && !(o->flags & HAS_X)))) {
              // check if we are trying to kickban
              if(match("!kban *",msg)) {
                // create the kick message
                strcat(nick,":");
                strcat(nick,rest);
                // kickban the user for 1200 seconds (20 mins) with the created reason
                ch->knockout(o,nick,1200);
              // check if we are kicking
              } else if(match("!kick *",msg)) {
                // kick the user with the reason
                ch->kick(o,rest);
              // check if we are quickbanning a user
              } else if(match("!quick *",msg)) {
                // create the kick message
                strcat(nick,":");
                strcat(nick,"Quickban.");
                // kickban the user for 10 seconds
                ch->knockout(o,nick,10);
              } // end of checks for kick/ban type
            } // end of check for kicking an invalid user
          } // end of check for setting topic or kick/banning
        } // end of check for a valid user sending the command
      } // end of check for is I have ops
    } // end of check for if we found a valid channel
  } // end of check for if we matched our text
} // end of function
Beispiel #22
0
static void
read_ngram_instance(lineiter_t ** li, hash_table_t * wid,
                    logmath_t * lmath, int order, int order_max,
                    ngram_raw_t * raw_ngram)
{
    int n;
    int words_expected;
    int i;
    char *wptr[NGRAM_MAX_ORDER + 1];
    uint32 *word_out;

    *li = lineiter_next(*li);
    if (*li == NULL) {
        E_ERROR("Unexpected end of ARPA file. Failed to read %d-gram\n",
                order);
        return;
    }
    string_trim((*li)->buf, STRING_BOTH);
    words_expected = order + 1;

    if ((n =
         str2words((*li)->buf, wptr,
                   NGRAM_MAX_ORDER + 1)) < words_expected) {
        if ((*li)->buf[0] != '\0') {
            E_WARN("Format error; %d-gram ignored: %s\n", order,
                   (*li)->buf);
        }
    }
    else {
        if (order == order_max) {
            raw_ngram->weights =
                (float *) ckd_calloc(1, sizeof(*raw_ngram->weights));
            raw_ngram->weights[0] = atof_c(wptr[0]);
            if (raw_ngram->weights[0] > 0) {
                E_WARN("%d-gram [%s] has positive probability. Zeroize\n",
                       order, wptr[1]);
                raw_ngram->weights[0] = 0.0f;
            }
            raw_ngram->weights[0] =
                logmath_log10_to_log_float(lmath, raw_ngram->weights[0]);
        }
        else {
            float weight, backoff;
            raw_ngram->weights =
                (float *) ckd_calloc(2, sizeof(*raw_ngram->weights));

            weight = atof_c(wptr[0]);
            if (weight > 0) {
                E_WARN("%d-gram [%s] has positive probability. Zeroize\n",
                       order, wptr[1]);
                raw_ngram->weights[0] = 0.0f;
            }
            else {
                raw_ngram->weights[0] =
                    logmath_log10_to_log_float(lmath, weight);
            }

            if (n == order + 1) {
                raw_ngram->weights[1] = 0.0f;
            }
            else {
                backoff = atof_c(wptr[order + 1]);
                raw_ngram->weights[1] =
                    logmath_log10_to_log_float(lmath, backoff);
            }
        }
        raw_ngram->words =
            (uint32 *) ckd_calloc(order, sizeof(*raw_ngram->words));
        for (word_out = raw_ngram->words + order - 1, i = 1;
             word_out >= raw_ngram->words; --word_out, i++) {
            hash_table_lookup_int32(wid, wptr[i], (int32 *) word_out);
        }
    }
}
Beispiel #23
0
int32
read_classdef_file(hash_table_t * classes, const char *file_name)
{
    FILE *fp;
    int32 is_pipe;
    int inclass;  /**< Are we currently reading a list of class words? */
    int32 rv = -1;
    gnode_t *gn;
    glist_t classwords = NULL;
    glist_t classprobs = NULL;
    char *classname = NULL;

    if ((fp = fopen_comp(file_name, "r", &is_pipe)) == NULL) {
        E_ERROR("File %s not found\n", file_name);
        return -1;
    }

    inclass = FALSE;
    while (!feof(fp)) {
        char line[512];
        char *wptr[2];
        int n_words;

        if (fgets(line, sizeof(line), fp) == NULL)
            break;

        n_words = str2words(line, wptr, 2);
        if (n_words <= 0)
            continue;

        if (inclass) {
            /* Look for an end of class marker. */
            if (n_words == 2 && 0 == strcmp(wptr[0], "END")) {
                classdef_t *classdef;
                gnode_t *word, *weight;
                int32 i;

                if (classname == NULL || 0 != strcmp(wptr[1], classname))
                    goto error_out;
                inclass = FALSE;

                /* Construct a class from the list of words collected. */
                classdef = ckd_calloc(1, sizeof(*classdef));
                classwords = glist_reverse(classwords);
                classprobs = glist_reverse(classprobs);
                classdef->n_words = glist_count(classwords);
                classdef->words = ckd_calloc(classdef->n_words,
                                             sizeof(*classdef->words));
                classdef->weights = ckd_calloc(classdef->n_words,
                                               sizeof(*classdef->weights));
                word = classwords;
                weight = classprobs;
                for (i = 0; i < classdef->n_words; ++i) {
                    classdef->words[i] = gnode_ptr(word);
                    classdef->weights[i] = gnode_float32(weight);
                    word = gnode_next(word);
                    weight = gnode_next(weight);
                }

                /* Add this class to the hash table. */
                if (hash_table_enter(classes, classname, classdef) !=
                    classdef) {
                    classdef_free(classdef);
                    goto error_out;
                }

                /* Reset everything. */
                glist_free(classwords);
                glist_free(classprobs);
                classwords = NULL;
                classprobs = NULL;
                classname = NULL;
            }
            else {
                float32 fprob;

                if (n_words == 2)
                    fprob = atof_c(wptr[1]);
                else
                    fprob = 1.0f;
                /* Add it to the list of words for this class. */
                classwords =
                    glist_add_ptr(classwords, ckd_salloc(wptr[0]));
                classprobs = glist_add_float32(classprobs, fprob);
            }
        }
        else {
            /* Start a new LM class if the LMCLASS marker is seen */
            if (n_words == 2 && 0 == strcmp(wptr[0], "LMCLASS")) {
                if (inclass)
                    goto error_out;
                inclass = TRUE;
                classname = ckd_salloc(wptr[1]);
            }
            /* Otherwise, just ignore whatever junk we got */
        }
    }
    rv = 0;                     /* Success. */

  error_out:
    /* Free all the stuff we might have allocated. */
    fclose_comp(fp, is_pipe);
    for (gn = classwords; gn; gn = gnode_next(gn))
        ckd_free(gnode_ptr(gn));
    glist_free(classwords);
    glist_free(classprobs);
    ckd_free(classname);

    return rv;
}
Beispiel #24
0
static int32
dict_read(FILE * fp, dict_t * d)
{
    char line[16384], **wptr;
    s3cipid_t p[4096];
    int32 lineno, nwd;
    s3wid_t w;
    int32 i, maxwd;
    s3cipid_t ci;
    int32 ph;

    maxwd = 4092;
    wptr = (char **) ckd_calloc(maxwd, sizeof(char *)); /* Freed below */

    lineno = 0;
    while (fgets(line, sizeof(line), fp) != NULL) {
        lineno++;
        if (line[0] == '#')     /* Comment line */
            continue;

        if ((nwd = str2words(line, wptr, maxwd)) < 0)
            E_FATAL("str2words(%s) failed; Increase maxwd from %d\n", line,
                    maxwd);

        if (nwd == 0)           /* Empty line */
            continue;
        /* wptr[0] is the word-string and wptr[1..nwd-1] the pronunciation sequence */
        if (nwd == 1) {
            E_ERROR("Line %d: No pronunciation for word %s; ignored\n",
                    lineno, wptr[0]);
            continue;
        }
        {char * fin;
	  float proba=0.0;
	  int deca=0;
	  proba=strtod(wptr[1],&fin);
	  if (fin !=wptr[1]) 
	    deca=1;
	  else
	    proba=0.0;
	  
        /* Convert pronunciation string to CI-phone-ids */
        for (i = 1; i < nwd-deca; i++) {
            p[i - 1] = dict_ciphone_id(d, wptr[i+deca]);
            if (NOT_S3CIPID(p[i - 1])) {
                E_ERROR("Line %d: Bad ciphone: %s; word %s ignored\n",
                        lineno, wptr[i], wptr[0]);
                break;
            }
        }

        if (i == nwd-deca) {         /* All CI-phones successfully converted to IDs */
            w = dict_add_word(d, wptr[0], p, nwd - 1-deca);
            if (NOT_S3WID(w))
                E_ERROR
                    ("Line %d: dict_add_word (%s) failed (duplicate?); ignored\n",
                     lineno, wptr[0]);
	    d->word[w].proba=proba;
        }
	}
    }
    

    if (d->lts_rules) {

#if 1                           /* Until we allow user to put in a mapping of the phoneset from LTS to the phoneset from mdef, 
                                   The checking will intrusively stop the recognizer.  */

        for (ci = 0; ci < mdef_n_ciphone(d->mdef); ci++) {

            if (!mdef_is_fillerphone(d->mdef, ci)) {
                for (ph = 0; cmu6_lts_phone_table[ph] != NULL; ph++) {

                    /*        E_INFO("%s %s\n",cmu6_lts_phone_table[ph],mdef_ciphone_str(d->mdef,ci)); */
                    if (!strcmp
                        (cmu6_lts_phone_table[ph],
                         mdef_ciphone_str(d->mdef, ci)))
                        break;
                }
                if (cmu6_lts_phone_table[ph] == NULL) {
                    E_FATAL
                        ("A phone in the model definition doesn't appear in the letter to sound ",
                         "rules. \n This is case we don't recommend user to ",
                         "use the built-in LTS. \n Please kindly turn off ",
                         "-lts_mismatch\n");
                }
            }
        }
#endif
    }
Beispiel #25
0
int batch_decoder_run(batch_decoder_t *bd)
{
    int32 ctloffset, ctlcount, ctlincr;
    lineiter_t *li, *ali = NULL;

    search_run(bd->fwdtree);
    search_run(bd->fwdflat);

    ctloffset = cmd_ln_int32_r(bd->config, "-ctloffset");
    ctlcount = cmd_ln_int32_r(bd->config, "-ctlcount");
    ctlincr = cmd_ln_int32_r(bd->config, "-ctlincr");

    if (bd->alignfh)
        ali = lineiter_start(bd->alignfh);
    for (li = lineiter_start(bd->ctlfh); li; li = lineiter_next(li)) {
        alignment_t *al = NULL;
        char *wptr[4];
        int32 nf, sf, ef;

        if (li->lineno < ctloffset) {
            if (ali)
                ali = lineiter_next(ali);
            continue;
        }
        if ((li->lineno - ctloffset) % ctlincr != 0) {
            if (ali)
                ali = lineiter_next(ali);
            continue;
        }
        if (ctlcount != -1 && li->lineno >= ctloffset + ctlcount)
            break;
        if (ali)
            al = parse_alignment(ali->buf, search_factory_d2p(bd->sf));
        sf = 0;
        ef = -1;
        nf = str2words(li->buf, wptr, 4);
        if (nf == 0) {
            /* Do nothing. */
        }
        else if (nf < 0) {
            E_ERROR("Unexpected extra data in control file at line %d\n", li->lineno);
        }
        else
        {
            char *file, *uttid;
            file = wptr[0];
            uttid = NULL;
            if (nf > 1)
            sf = atoi(wptr[1]);
            if (nf > 2)
            ef = atoi(wptr[2]);
            if (nf > 3)
            uttid = wptr[3];
            /* Do actual decoding. */
            batch_decoder_decode(bd, file, uttid, sf, ef, al);
        }
        alignment_free(al);
        if (ali) ali = lineiter_next(ali);
    }
    featbuf_producer_shutdown(search_factory_featbuf(bd->sf));
    return 0;
}
Beispiel #26
0
int
ps_add_word(ps_decoder_t *ps,
            char const *word,
            char const *phones,
            int update)
{
    int32 wid;
    s3cipid_t *pron;
    hash_iter_t *search_it;
    char **phonestr, *tmp;
    int np, i, rv;

    /* Parse phones into an array of phone IDs. */
    tmp = ckd_salloc(phones);
    np = str2words(tmp, NULL, 0);
    phonestr = ckd_calloc(np, sizeof(*phonestr));
    str2words(tmp, phonestr, np);
    pron = ckd_calloc(np, sizeof(*pron));
    for (i = 0; i < np; ++i) {
        pron[i] = bin_mdef_ciphone_id(ps->acmod->mdef, phonestr[i]);
        if (pron[i] == -1) {
            E_ERROR("Unknown phone %s in phone string %s\n",
                    phonestr[i], tmp);
            ckd_free(phonestr);
            ckd_free(tmp);
            ckd_free(pron);
            return -1;
        }
    }
    /* No longer needed. */
    ckd_free(phonestr);
    ckd_free(tmp);

    /* Add it to the dictionary. */
    if ((wid = dict_add_word(ps->dict, word, pron, np)) == -1) {
        ckd_free(pron);
        return -1;
    }
    /* No longer needed. */
    ckd_free(pron);

    /* Now we also have to add it to dict2pid. */
    dict2pid_add_word(ps->d2p, wid);

    /* TODO: we definitely need to refactor this */
    for (search_it = hash_table_iter(ps->searches); search_it;
         search_it = hash_table_iter_next(search_it)) {
        ps_search_t *search = hash_entry_val(search_it->ent);
        if (!strcmp(PS_SEARCH_NGRAM, ps_search_name(search))) {
            ngram_model_t *lmset = ((ngram_search_t *) search)->lmset;
            if (ngram_model_add_word(lmset, word, 1.0) == NGRAM_INVALID_WID) {
                hash_table_iter_free(search_it);
                return -1;
            }
        }

        if (update) {
            if ((rv = ps_search_reinit(search, ps->dict, ps->d2p) < 0)) {
                hash_table_iter_free(search_it);
                return rv;
            }
        }
    }

    /* Rebuild the widmap and search tree if requested. */
    return wid;
}
Beispiel #27
0
static void
process_ctl(ps_decoder_t *ps, cmd_ln_t *config, FILE *ctlfh)
{
    int32 ctloffset, ctlcount, ctlincr;
    int32 i;
    char *line;
    size_t len;
    FILE *hypfh = NULL, *hypsegfh = NULL, *ctmfh = NULL;
    FILE *mllrfh = NULL, *lmfh = NULL, *fsgfh = NULL;
    double n_speech, n_cpu, n_wall;
    char const *outlatdir;
    char const *nbestdir;
    char const *str;
    int frate;

    ctloffset = cmd_ln_int32_r(config, "-ctloffset");
    ctlcount = cmd_ln_int32_r(config, "-ctlcount");
    ctlincr = cmd_ln_int32_r(config, "-ctlincr");
    outlatdir = cmd_ln_str_r(config, "-outlatdir");
    nbestdir = cmd_ln_str_r(config, "-nbestdir");
    frate = cmd_ln_int32_r(config, "-frate");

    if ((str = cmd_ln_str_r(config, "-mllrctl"))) {
        mllrfh = fopen(str, "r");
        if (mllrfh == NULL) {
            E_ERROR_SYSTEM("Failed to open MLLR control file file %s", str);
            goto done;
        }
    }
    if ((str = cmd_ln_str_r(config, "-fsgctl"))) {
        fsgfh = fopen(str, "r");
        if (fsgfh == NULL) {
            E_ERROR_SYSTEM("Failed to open FSG control file file %s", str);
            goto done;
        }
    }
    if ((str = cmd_ln_str_r(config, "-lmnamectl"))) {
        lmfh = fopen(str, "r");
        if (lmfh == NULL) {
            E_ERROR_SYSTEM("Failed to open LM name control file file %s", str);
            goto done;
        }
    }
    if ((str = cmd_ln_str_r(config, "-hyp"))) {
        hypfh = fopen(str, "w");
        if (hypfh == NULL) {
            E_ERROR_SYSTEM("Failed to open hypothesis file %s for writing", str);
            goto done;
        }
        setbuf(hypfh, NULL);
    }
    if ((str = cmd_ln_str_r(config, "-hypseg"))) {
        hypsegfh = fopen(str, "w");
        if (hypsegfh == NULL) {
            E_ERROR_SYSTEM("Failed to open hypothesis file %s for writing", str);
            goto done;
        }
        setbuf(hypsegfh, NULL);
    }
    if ((str = cmd_ln_str_r(config, "-ctm"))) {
        ctmfh = fopen(str, "w");
        if (ctmfh == NULL) {
            E_ERROR_SYSTEM("Failed to open hypothesis file %s for writing", str);
            goto done;
        }
        setbuf(ctmfh, NULL);
    }

    i = 0;
    while ((line = fread_line(ctlfh, &len))) {
        char *wptr[4];
        int32 nf, sf, ef;
        char *mllrline = NULL, *lmline = NULL, *fsgline = NULL;
        char *fsgfile = NULL, *lmname = NULL, *mllrfile = NULL;

        if (mllrfh) {
            mllrline = fread_line(mllrfh, &len);
            if (mllrline == NULL) {
                E_ERROR("File size mismatch between control and MLLR control\n");
                ckd_free(line);
                ckd_free(mllrline);
                goto done;
            }
            mllrfile = string_trim(mllrline, STRING_BOTH);
        }
        if (lmfh) {
            lmline = fread_line(lmfh, &len);
            if (lmline == NULL) {
                E_ERROR("File size mismatch between control and LM control\n");
                ckd_free(line);
                ckd_free(lmline);
                goto done;
            }
            lmname = string_trim(lmline, STRING_BOTH);
        }
        if (fsgfh) {
            fsgline = fread_line(fsgfh, &len);
            if (fsgline == NULL) {
                E_ERROR("File size mismatch between control and FSG control\n");
                ckd_free(line);
                ckd_free(fsgline);
                goto done;
            }
            fsgfile = string_trim(fsgline, STRING_BOTH);
        }

        if (i < ctloffset) {
            i += ctlincr;
            goto nextline;
        }
        if (ctlcount != -1 && i >= ctloffset + ctlcount) {
            goto nextline;
        }

        sf = 0;
        ef = -1;
        nf = str2words(line, wptr, 4);
        if (nf == 0) {
            /* Do nothing. */
        }
        else if (nf < 0) {
            E_ERROR("Unexpected extra data in control file at line %d\n", i);
        }
        else {
            char const *hyp, *file, *uttid;
            int32 score;

            file = wptr[0];
            uttid = NULL;
            if (nf > 1)
                sf = atoi(wptr[1]);
            if (nf > 2)
                ef = atoi(wptr[2]);
            if (nf > 3)
                uttid = wptr[3];

            E_INFO("Decoding '%s'\n", uttid ? uttid : file);

            /* Do actual decoding. */
            if(process_mllrctl_line(ps, config, mllrfile) < 0)
                continue;
            if(process_lmnamectl_line(ps, config, lmname) < 0)
                continue;
            if(process_fsgctl_line(ps, config, fsgfile) < 0)
                continue;
            if(process_ctl_line(ps, config, file, uttid, sf, ef) < 0)
                continue;
            hyp = ps_get_hyp(ps, &score, &uttid);
            
            /* Write out results and such. */
            if (hypfh) {
                fprintf(hypfh, "%s (%s %d)\n", hyp ? hyp : "", uttid, score);
            }
            if (hypsegfh) {
                write_hypseg(hypsegfh, ps, uttid);
            }
            if (ctmfh) {
                ps_seg_t *itor = ps_seg_iter(ps, &score);
                write_ctm(ctmfh, ps, itor, uttid, frate);
            }
            if (outlatdir) {
                write_lattice(ps, outlatdir, uttid);
            }
            if (nbestdir) {
                write_nbest(ps, nbestdir, uttid);
            }
            ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall);
            E_INFO("%s: %.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
                   uttid, n_speech, n_cpu, n_wall);
            E_INFO("%s: %.2f xRT (CPU), %.2f xRT (elapsed)\n",
                   uttid, n_cpu / n_speech, n_wall / n_speech);
            E_INFO_NOFN("%s (%s %d)\n", hyp ? hyp : "", uttid, score);
        }
        i += ctlincr;
    nextline:
        ckd_free(mllrline);
        ckd_free(fsgline);
        ckd_free(lmline);
        ckd_free(line);
    }

    ps_get_all_time(ps, &n_speech, &n_cpu, &n_wall);
    E_INFO("TOTAL %.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    E_INFO("AVERAGE %.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);

done:
    if (hypfh)
        fclose(hypfh);
    if (hypsegfh)
        fclose(hypsegfh);
    if (ctmfh)
        fclose(ctmfh);
}
static int
read_ngram_instance(lineiter_t ** li, hash_table_t * wid,
                    logmath_t * lmath, int order, int order_max,
                    ngram_raw_t * raw_ngram)
{
    int n;
    int words_expected;
    int i;
    char *wptr[NGRAM_MAX_ORDER + 1];
    uint32 *word_out;

    if (*li) 
        *li = lineiter_next(*li);
    if (*li == NULL) {
        E_ERROR("Unexpected end of ARPA file. Failed to read %d-gram\n",
                order);
        return -1;
    }
    words_expected = order + 1;
    if ((n =
         str2words((*li)->buf, wptr,
                   NGRAM_MAX_ORDER + 1)) < words_expected) {
        E_ERROR("Format error; %d-gram ignored: %s\n", order, (*li)->buf);
        return -1;
    }

    raw_ngram->order = order;

    if (order == order_max) {
        raw_ngram->prob = atof_c(wptr[0]);
        if (raw_ngram->prob > 0) {
            E_WARN("%d-gram '%s' has positive probability\n", order, wptr[1]);
            raw_ngram->prob = 0.0f;
        }
        raw_ngram->prob =
            logmath_log10_to_log_float(lmath, raw_ngram->prob);
    }
    else {
        float weight, backoff;

        weight = atof_c(wptr[0]);
        if (weight > 0) {
            E_WARN("%d-gram '%s' has positive probability\n", order, wptr[1]);
            raw_ngram->prob = 0.0f;
        }
        else {
            raw_ngram->prob =
                logmath_log10_to_log_float(lmath, weight);
        }

        if (n == order + 1) {
            raw_ngram->backoff = 0.0f;
        }
        else {
            backoff = atof_c(wptr[order + 1]);
            raw_ngram->backoff =
                logmath_log10_to_log_float(lmath, backoff);
        }
    }
    raw_ngram->words =
        (uint32 *) ckd_calloc(order, sizeof(*raw_ngram->words));
    for (word_out = raw_ngram->words + order - 1, i = 1;
         word_out >= raw_ngram->words; --word_out, i++) {
        hash_table_lookup_int32(wid, wptr[i], (int32 *) word_out);
    }
    return 0;
}
Beispiel #29
0
static int32
dict_read(FILE * fp, dict_t * d)
{
    lineiter_t *li;
    char **wptr;
    s3cipid_t *p;
    int32 lineno, nwd;
    s3wid_t w;
    int32 i, maxwd;
    size_t stralloc, phnalloc;

    maxwd = 512;
    p = (s3cipid_t *) ckd_calloc(maxwd + 4, sizeof(*p));
    wptr = (char **) ckd_calloc(maxwd, sizeof(char *)); /* Freed below */

    lineno = 0;
    stralloc = phnalloc = 0;
    for (li = lineiter_start(fp); li; li = lineiter_next(li)) {
        lineno++;
        if (0 == strncmp(li->buf, "##", 2)
            || 0 == strncmp(li->buf, ";;", 2))
            continue;

        if ((nwd = str2words(li->buf, wptr, maxwd)) < 0) {
            /* Increase size of p, wptr. */
            nwd = str2words(li->buf, NULL, 0);
            assert(nwd > maxwd); /* why else would it fail? */
            maxwd = nwd;
            p = (s3cipid_t *) ckd_realloc(p, (maxwd + 4) * sizeof(*p));
            wptr = (char **) ckd_realloc(wptr, maxwd * sizeof(*wptr));
        }

        if (nwd == 0)           /* Empty line */
            continue;
        /* wptr[0] is the word-string and wptr[1..nwd-1] the pronunciation sequence */
        if (nwd == 1) {
            E_ERROR("Line %d: No pronunciation for word %s; ignored\n",
                    lineno, wptr[0]);
            continue;
        }


        /* Convert pronunciation string to CI-phone-ids */
        for (i = 1; i < nwd; i++) {
            p[i - 1] = dict_ciphone_id(d, wptr[i]);
            if (NOT_S3CIPID(p[i - 1])) {
                E_ERROR("Line %d: Bad ciphone: %s; word %s ignored\n",
                        lineno, wptr[i], wptr[0]);
                break;
            }
        }

        if (i == nwd) {         /* All CI-phones successfully converted to IDs */
            w = dict_add_word(d, wptr[0], p, nwd - 1);
            if (NOT_S3WID(w))
                E_ERROR
                    ("Line %d: dict_add_word (%s) failed (duplicate?); ignored\n",
                     lineno, wptr[0]);
            else {
                stralloc += strlen(d->word[w].word);
                phnalloc += d->word[w].pronlen * sizeof(s3cipid_t);
            }
        }
    }
    E_INFO("Allocated %d KiB for strings, %d KiB for phones\n",
           (int)stralloc / 1024, (int)phnalloc / 1024);
    ckd_free(p);
    ckd_free(wptr);

    return 0;
}
Beispiel #30
0
static int
read_1grams_arpa(lineiter_t ** li, uint32 count, ngram_model_t * base,
                 unigram_t * unigrams)
{
    uint32 i;
    int n;
    int n_parts;
    char *wptr[3];

    while (*li && strcmp((*li)->buf, "\\1-grams:") != 0) {
	*li = lineiter_next(*li);
    }
    if (*li == NULL) {
        E_ERROR_SYSTEM("Failed to read \\1-grams: mark");
        return -1;
    }

    n_parts = 2;
    for (i = 0; i < count; i++) {
        *li = lineiter_next(*li);
        if (*li == NULL) {
            E_ERROR
                ("Unexpected end of ARPA file. Failed to read %dth unigram\n",
                 i + 1);
            return -1;
        }
        if ((n = str2words((*li)->buf, wptr, 3)) < n_parts) {
            E_ERROR("Format error at line %s, Failed to read unigrams\n", (*li)->buf);
            return -1;
        }

        unigram_t *unigram = &unigrams[i];
        unigram->prob =
            logmath_log10_to_log_float(base->lmath, atof_c(wptr[0]));
        if (unigram->prob > 0) {
            E_WARN("Unigram '%s' has positive probability\n", wptr[1]);
            unigram->prob = 0;
        }
        if (n == n_parts + 1) {
            unigram->bo =
                logmath_log10_to_log_float(base->lmath,
                                           atof_c(wptr[2]));
        }
        else {
            unigram->bo = 0.0f;
        }

        /* TODO: classify float with fpclassify and warn if bad value occurred */
        base->word_str[i] = ckd_salloc(wptr[1]);
    }

    /* fill hash-table that maps unigram names to their word ids */
    for (i = 0; i < count; i++) {
        if ((hash_table_enter
             (base->wid, base->word_str[i],
              (void *) (long) i)) != (void *) (long) i) {
            E_WARN("Duplicate word in dictionary: %s\n",
                   base->word_str[i]);
        }
    }
    return 0;
}