static int32 nextline_str2words(FILE * fp, int32 * lineno, char **lineptr, char ***wordptr) { for (;;) { size_t len; int32 n; ckd_free(*lineptr); if ((*lineptr = fread_line(fp, &len)) == NULL) return -1; (*lineno)++; if ((*lineptr)[0] == FSG_MODEL_COMMENT_CHAR) continue; /* Skip comment lines */ n = str2words(*lineptr, NULL, 0); if (n == 0) continue; /* Skip blank lines */ /* Abuse of realloc(), but this doesn't have to be fast. */ if (*wordptr == NULL) *wordptr = ckd_calloc(n, sizeof(**wordptr)); else *wordptr = ckd_realloc(*wordptr, n * sizeof(**wordptr)); return str2words(*lineptr, *wordptr, n); } }
int ps_add_word(ps_decoder_t *ps, char const *word, char const *phones, int update) { int32 wid, lmwid; ngram_model_t *lmset; s3cipid_t *pron; char **phonestr, *tmp; int np, i, rv; /* Parse phones into an array of phone IDs. */ tmp = ckd_salloc(phones); np = str2words(tmp, NULL, 0); phonestr = ckd_calloc(np, sizeof(*phonestr)); str2words(tmp, phonestr, np); pron = ckd_calloc(np, sizeof(*pron)); for (i = 0; i < np; ++i) { pron[i] = bin_mdef_ciphone_id(ps->acmod->mdef, phonestr[i]); if (pron[i] == -1) { E_ERROR("Unknown phone %s in phone string %s\n", phonestr[i], tmp); ckd_free(phonestr); ckd_free(tmp); ckd_free(pron); return -1; } } /* No longer needed. */ ckd_free(phonestr); ckd_free(tmp); /* Add it to the dictionary. */ if ((wid = dict_add_word(ps->dict, word, pron, np)) == -1) { ckd_free(pron); return -1; } /* No longer needed. */ ckd_free(pron); /* Now we also have to add it to dict2pid. */ dict2pid_add_word(ps->d2p, wid); if ((lmset = ps_get_lmset(ps)) != NULL) { /* Add it to the LM set (meaning, the current LM). In a perfect * world, this would result in the same WID, but because of the * weird way that word IDs are handled, it doesn't. */ if ((lmwid = ngram_model_add_word(lmset, word, 1.0)) == NGRAM_INVALID_WID) return -1; } /* Rebuild the widmap and search tree if requested. */ if (update) { if ((rv = ps_search_reinit(ps->search, ps->dict, ps->d2p) < 0)) return rv; } return wid; }
alignment_t * parse_alignment(char *line, dict2pid_t *d2p) { alignment_t *al; char **wptr; int nf, i; double spos; int32 frate = 100; /* FIXME */ nf = str2words(line, NULL, 0); if (nf < 0) return NULL; wptr = ckd_calloc(nf, sizeof(*wptr)); nf = str2words(line, wptr, nf); if (nf < 0) { ckd_free(wptr); return NULL; } al = alignment_init(d2p); spos = 0.0; for (i = 0; i < nf; ++i) { char *c = strchr(wptr[i], ':'); double epos; int duration; if (c == NULL) /* word ID */ break; *c++ = '\0'; epos = atof(c); duration = (int) ((epos - spos) * frate); alignment_add_word(al, dict_wordid(d2p->dict, wptr[i]), duration); spos = epos; } return al; }
static void evaluate_string(ngram_model_t *lm, logmath_t *lmath, const char *text) { char *textfoo; char **words; int32 n, ch, noovs, nccs, lscr; /* Split it into an array of strings. */ textfoo = ckd_salloc(text); n = str2words(textfoo, NULL, 0); if (n < 0) E_FATAL("str2words(textfoo, NULL, 0) = %d, should not happen\n", n); if (n == 0) /* Do nothing! */ return; words = ckd_calloc(n, sizeof(*words)); str2words(textfoo, words, n); ch = calc_entropy(lm, words, n, &nccs, &noovs, &lscr); printf("input: %s\n", text); printf("cross-entropy: %f bits\n", ch * log(logmath_get_base(lmath)) / log(2)); /* Calculate perplexity pplx = exp CH */ printf("perplexity: %f\n", logmath_exp(lmath, ch)); printf("lm score: %d\n", lscr); /* Report OOVs and CCs */ printf("%d words evaluated\n", n); printf("%d OOVs, %d context cues removed\n", noovs, nccs); ckd_free(textfoo); ckd_free(words); }
static void evaluate_file(ngram_model_t *lm, logmath_t *lmath, const char *lsnfn) { FILE *fh; lineiter_t *litor; int32 nccs, noovs, nwords, lscr; float64 ch, log_to_log2;; if ((fh = fopen(lsnfn, "r")) == NULL) E_FATAL_SYSTEM("failed to open transcript file %s", lsnfn); /* We have to keep ch in floating-point to avoid overflows, so * we might as well use log2. */ log_to_log2 = log(logmath_get_base(lmath)) / log(2); lscr = nccs = noovs = nwords = 0; ch = 0.0; for (litor = lineiter_start(fh); litor; litor = lineiter_next(litor)) { char **words; int32 n, tmp_ch, tmp_noovs, tmp_nccs, tmp_lscr; n = str2words(litor->buf, NULL, 0); if (n < 0) E_FATAL("str2words(line, NULL, 0) = %d, should not happen\n", n); if (n == 0) /* Do nothing! */ continue; words = ckd_calloc(n, sizeof(*words)); str2words(litor->buf, words, n); /* Remove any utterance ID (FIXME: has to be a single "word") */ if (words[n-1][0] == '(' && words[n-1][strlen(words[n-1])-1] == ')') n = n - 1; tmp_ch = calc_entropy(lm, words, n, &tmp_nccs, &tmp_noovs, &tmp_lscr); ch += (float64) tmp_ch * (n - tmp_nccs - tmp_noovs) * log_to_log2; nccs += tmp_nccs; noovs += tmp_noovs; lscr += tmp_lscr; nwords += n; ckd_free(words); } ch /= (nwords - nccs - noovs); printf("cross-entropy: %f bits\n", ch); /* Calculate perplexity pplx = exp CH */ printf("perplexity: %f\n", pow(2.0, ch)); printf("lm score: %d\n", lscr); /* Report OOVs and CCs */ printf("%d words evaluated\n", nwords); printf("%d OOVs (%.2f%%), %d context cues removed\n", noovs, (double)noovs / nwords * 100, nccs); }
static int open_nist_file(sphinx_wave2feat_t *wtf, char const *infile, FILE **out_fh, int detect_endian) { char nist[7]; lineiter_t *li; FILE *fh; if ((fh = fopen(infile, "rb")) == NULL) { E_ERROR_SYSTEM("Failed to open %s", infile); return -1; } if (fread(&nist, 1, 7, fh) != 7) { E_ERROR_SYSTEM("Failed to read NIST header"); fclose(fh); return -1; } /* Is this actually a NIST file? */ if (0 != strncmp(nist, "NIST_1A", 7)) { fclose(fh); return FALSE; } /* Rewind, parse lines. */ fseek(fh, 0, SEEK_SET); for (li = lineiter_start(fh); li; li = lineiter_next(li)) { char **words; int nword; string_trim(li->buf, STRING_BOTH); if (strlen(li->buf) == 0) { lineiter_free(li); break; } nword = str2words(li->buf, NULL, 0); if (nword != 3) continue; words = (char **)ckd_calloc(nword, sizeof(*words)); str2words(li->buf, words, nword); if (0 == strcmp(words[0], "sample_rate")) { cmd_ln_set_float32_r(wtf->config, "-samprate", atof_c(words[2])); } if (0 == strcmp(words[0], "channel_count")) { cmd_ln_set_int32_r(wtf->config, "-nchans", atoi(words[2])); } if (detect_endian && 0 == strcmp(words[0], "sample_byte_format")) { cmd_ln_set_str_r(wtf->config, "-input_endian", (0 == strcmp(words[2], "10")) ? "big" : "little"); } ckd_free(words); } fseek(fh, 1024, SEEK_SET); if (out_fh) *out_fh = fh; else fclose(fh); return TRUE; }
state_t *next_utt_states(uint32 *n_state, lexicon_t *lex, model_inventory_t *inv, model_def_t *mdef, char *trans ) { char **word; char *utterance; uint32 n_word; uint32 n_phone; char *btw_mark; acmod_set_t *acmod_set; acmod_id_t *phone; state_t *state_seq; utterance = ckd_salloc(trans); n_word = str2words(utterance, NULL, 0); word = ckd_calloc(n_word, sizeof(char*)); str2words(utterance, word, n_word); phone = mk_phone_list(&btw_mark, &n_phone, word, n_word, lex); if (phone == NULL) { E_WARN("Unable to produce phonetic transcription for the utterance '%s'\n", trans); ckd_free(word); return NULL; } acmod_set = inv->acmod_set; #ifdef NEXT_UTT_STATES_VERBOSE print_phone_list(phone, n_phone, btw_mark, acmod_set); #endif cvt2triphone(acmod_set, phone, btw_mark, n_phone); #ifdef NEXT_UTT_STATES_VERBOSE print_phone_list(phone, n_phone, btw_mark, acmod_set); #endif state_seq = state_seq_make(n_state, phone, n_phone, inv, mdef); #ifdef NEXT_UTT_STATES_VERBOSE state_seq_print(state_seq, *n_state, mdef); #endif ckd_free(phone); ckd_free(btw_mark); ckd_free(word); ckd_free(utterance); return state_seq; }
static void process_utt (char *uttfile, int32 sf, int32 ef, char *uttid) { int32 i, f, nwd; char *str; char tmp[65535], *wdp[4096]; for (i = 0; i < n_inhyp; i++) { if ((str = corpus_lookup (inhyp[i], uttid)) != NULL) break; } if (i >= n_inhyp) E_ERROR("%s: Missing\n", uttid); else { strcpy (tmp, str); if ((nwd = str2words (tmp, wdp, 4095)) < 0) E_FATAL("str2words failed\n"); if ((nwd == 0) || (sscanf (wdp[nwd-1], "%d", &f) != 1) || (f != (ef-sf+1))) E_ERROR("%s: Bad hyp in %s: %s\n", uttid, infilename[i], str); else { fprintf (outfp, "%s %s\n", uttid, str); fflush (outfp); E_INFO("%s: Extracted from %s\n", uttid, infilename[i]); } } }
void Raw::onBotnetcmd(const char *from, const char *cmd) { char arg[2][MAX_LEN], *text; str2words(arg[0], cmd, 2, MAX_LEN, 0); if(!strcasecmp(arg[1], "raw")) { text=srewind(cmd, 2); if(text && *text) { if(penalty<10) { net.irc.send(text, NULL); penalty+=calculatePenalty(text); } else net.sendOwner(arg[0], "[raw] Penalty is too high. Please wait a while and try again.", NULL); } else net.sendOwner(arg[0], "[raw] Syntax: .bc ", (const char*) config.handle, " raw <text>", NULL); } }
int dict_add_g2p_word(dict_t * dict, char const *word) { int32 wid = 0; s3cipid_t *pron; char **phonestr, *tmp; int np, i; char *phones; phones = dict_g2p(word, dict->ngram_g2p_model); if (phones == NULL) return 0; E_INFO("Adding phone %s for word %s \n", phones, word); tmp = ckd_salloc(phones); np = str2words(tmp, NULL, 0); phonestr = ckd_calloc(np, sizeof(*phonestr)); str2words(tmp, phonestr, np); pron = ckd_calloc(np, sizeof(*pron)); for (i = 0; i < np; ++i) { pron[i] = bin_mdef_ciphone_id(dict->mdef, phonestr[i]); if (pron[i] == -1) { E_ERROR("Unknown phone %s in phone string %s\n", phonestr[i], tmp); ckd_free(phonestr); ckd_free(tmp); ckd_free(pron); ckd_free(phones); return -1; } } ckd_free(phonestr); ckd_free(tmp); ckd_free(phones); if ((wid = dict_add_word(dict, word, pron, np)) == -1) { ckd_free(pron); return -1; } ckd_free(pron); return wid; }
/* Validation function for loading a hypseg corpus */ static int32 validate (char *str) { char tmp[65535], *wdp[4096]; int32 nwd; strcpy (tmp, str); if ((nwd = str2words (tmp, wdp, 4095)) < 0) E_FATAL("str2words failed\n"); if ((nwd > 0) && (strcmp (wdp[nwd-1], "(null)") == 0)) return 0; /* Exclude (null) hypotheses */ return 1; }
main (int32 argc, char *argv[]) { dict_t **d; int32 i, k, p, wid; char line[16384], *wp[1024]; if (argc < 2) { E_INFO("Usage: %s dictfile [dictfile ...] < vocabfile\n", argv[0]); exit(0); } d = (dict_t **) ckd_calloc (argc-1, sizeof(dict_t *)); for (i = 1; i < argc; i++) d[i-1] = dict_init (NULL, argv[i], NULL, 0); while (fgets (line, sizeof(line), stdin) != NULL) { if ((k = str2words (line, wp, 1024)) < 0) E_FATAL("Line too long: %s\n", line); if (k > 2) E_FATAL("Vocab entry contains too many words\n"); if (k == 0) continue; if (k == 1) wp[1] = wp[0]; /* Look up word in each dictionary until found */ k = 0; for (i = 0; (i < argc-1) && (k == 0); i++) { wid = dict_wordid (d[i], wp[1]); if (NOT_WID(wid)) continue; for (wid = dict_basewid(d[i], wid); IS_WID(wid); wid = dict_nextalt(d[i], wid)) { k++; if (k == 1) printf ("%s\t", wp[0]); else printf ("%s(%d)\t", wp[0], k); for (p = 0; p < dict_pronlen(d[i], wid); p++) printf (" %s", dict_ciphone_str (d[i], wid, p)); printf ("\n"); } } if (k == 0) E_ERROR("No pronunciation for: '%s'\n", wp[0]); } }
static int32 dict_read (FILE *fp, dict_t *d) { char line[16384], **wptr; s3cipid_t p[4096]; int32 lineno, nwd; s3wid_t w; int32 i, maxwd; maxwd = 4092; wptr = (char **) ckd_calloc (maxwd, sizeof(char *)); lineno = 0; while (fgets (line, sizeof(line), fp) != NULL) { lineno++; if (line[0] == '#') /* Comment line */ continue; if ((nwd = str2words (line, wptr, maxwd)) < 0) E_FATAL("str2words(%s) failed; Increase maxwd from %d\n", line, maxwd); if (nwd == 0) /* Empty line */ continue; /* wptr[0] is the word-string and wptr[1..nwd-1] the pronunciation sequence */ if (nwd == 1) { E_ERROR("Line %d: No pronunciation for word %s; ignored\n", lineno, wptr[0]); continue; } /* Convert pronunciation string to CI-phone-ids */ for (i = 1; i < nwd; i++) { p[i-1] = dict_ciphone_id (d, wptr[i]); if (NOT_CIPID(p[i-1])) { E_ERROR("Line %d: Bad ciphone: %s; word %s ignored\n", lineno, wptr[i], wptr[0]); break; } } if (i == nwd) { /* All CI-phones successfully converted to IDs */ w = dict_add_word (d, wptr[0], p, nwd-1); if (NOT_WID(w)) E_ERROR("Line %d: dict_add_word (%s) failed (duplicate?); ignored\n", lineno, wptr[0]); } } ckd_free (wptr); return 0; }
/** * Map the given ngram string to an array of word IDs of the individual * words in the ngram. * * args: * ngram - the ngram string to map * length - the length of the ngram string * w - the word ID array * lm - the language model to use * * returns: * the number of words in the ngram string, or 0 if the string contains an * unknown word */ int ngram2wid(char *ngram, int length, s3lmwid32_t * w, lm_t * lm) { char *word[1024]; int nwd; int i; if ((nwd = str2words(ngram, word, length)) < 0) E_FATAL("Increase word[] and w[] arrays size\n"); for (i = 0; i < nwd; i++) { w[i] = lm_wid(lm, word[i]); if (NOT_LMWID(lm, w[i])) { E_ERROR("Unknown word: %s\n", word[i]); return 0; } } return nwd; }
static int32 nextline_str2words(FILE * fp, int32 * lineno, char *line, int32 max_line, char **wordptr, int32 max_ptr) { int32 n; for (;;) { if (fgets(line, max_line, fp) == NULL) return -1; (*lineno)++; if (line[0] != WORD_FSG_COMMENT_CHAR) { /* Skip comment lines */ if ((n = str2words(line, wordptr, max_ptr)) < 0) E_FATAL("Line[%d] too long\n", *lineno); if (n > 0) /* Skip blank lines */ break; } } return n; }
main (int32 argc, char *argv[]) { char line[16384], **wptr; int32 i, n, k, np; if (argc > 1) { E_INFO("Usage: %s < <result-of-pronerralign>\n", argv[0]); exit(0); } for (np = 0; phonestr[np]; np++); E_INFO("%d phones\n"); wptr = (char **) ckd_calloc (MAX_WORDS, sizeof(char *)); while (fgets (line, sizeof(line), stdin) != NULL) { if ((n = str2words (line, wptr, MAX_WORDS)) < 0) E_FATAL("str2words(%s) failed; increase %d(?)\n", line, MAX_WORDS); /* Read first (count) field */ if (n == 0) continue; if (sscanf (wptr[0], "%d", &k) != 1) E_FATAL("First field not a count: %s\n", wptr[0]); /* Find => separator after word list */ for (i = 0; (i < n) && (strcmp (wptr[i], "=>") != 0); i++); i++; /* Hopefully at (lc) */ /* Must have at least: (lc) p1 p2 (rc) */ if (n-i <= 3) continue; assert (i > 2); if ((strcmp (wptr[i+1], "[[") != 0) && (strcmp (wptr[i+2], "[[") != 0)) { /* No error */ printf ("%6d %-5s %-5s %-5s %s\n", k, wptr[i], wptr[i+1], wptr[i+2], wptr[1]); } else if (strcmp (wptr[i+1], "[[") == 0) { /* * First phone got transformed. Must be: * (lc) [[ => ee ]] p2 (rc), * (lc) [[ ee => ]] p2 (rc), or * (lc) [[ pp => ee ]] p2 (rc) */ if (n-i <= 6) continue; if ((strcmp (wptr[i+2], "=>") == 0) && (strcmp (wptr[i+4], "]]") == 0) && (strcmp (wptr[i+5], "[[") != 0)) { printf ("%6d %-5s %-5s %-5s => %-5s %s\n", k, wptr[i], wptr[i+3], wptr[i+5], "--", wptr[1]); } else if ((strcmp (wptr[i+3], "=>") == 0) && (strcmp (wptr[i+4], "]]") == 0) && (strcmp (wptr[i+5], "[[") != 0)) { printf ("%6d %-5s %-5s %-5s => %-5s %s\n", k, wptr[i], "--", wptr[i+5], wptr[i+2], wptr[1]); } else if ((strcmp (wptr[i+3], "=>") == 0) && (strcmp (wptr[i+5], "]]") == 0) && (strcmp (wptr[i+6], "[[") != 0) && (n-i > 7)) { printf ("%6d %-5s %-5s %-5s => %-5s %s\n", k, wptr[i], wptr[i+4], wptr[i+6], wptr[i+2], wptr[1]); } } } }
void parse_hub(char *data) { char arg[10][MAX_LEN]; chan *ch; if(!strlen(data)) return; str2words(arg[0], data, 10, MAX_LEN); if(!(net.hub.status & STATUS_REGISTERED)) { switch(net.hub.tmpint) { /* case 0: { //3 bytes for WILL ECHO OFF + 1 byte for NEW LINE ++net.hub.tmpint; //enable encryption net.hub.enableCrypt((unsigned char *) config.botnetword, strlen(config.botnetword)); return; } */ case 1: { if(strlen(arg[0])) { char hash[33]; ++net.hub.tmpint; //unsigned char *dupa = ((entMD5Hash *) &config.currentHub->getPass())->getHash(); MD5HexHash(hash, arg[0], AUTHSTR_LEN, ((entMD5Hash *) &config.hub.getPass())->getHash(), 16); net.hub.send(config.handle, " ", hash, NULL); net.hub.tmpstr = (char *) malloc(AUTHSTR_LEN + 1); MD5CreateAuthString(net.hub.tmpstr, AUTHSTR_LEN); net.hub.send(net.hub.tmpstr, NULL); return; } break; } case 2: { if(strlen(arg[3])) { if(MD5HexValidate(arg[3], net.hub.tmpstr, strlen(net.hub.tmpstr), ((entMD5Hash *) &config.hub.getPass())->getHash(), 16)) { char buf[MAX_LEN]; ++net.hub.tmpint; userlist.addHandle(arg[0], 0, B_FLAGS | HAS_H | HAS_L, arg[1], arg[2], 0); net.hub.handle = userlist.findHandle(arg[0]); DEBUG(printf("[D] hub handle: %s\n", net.hub.handle->name)); free(net.hub.tmpstr); net.hub.tmpstr = NULL; if(config.bottype != BOT_SLAVE) sprintf(buf, "%llu", userlist.SN); else strcpy(buf, "0"); net.hub.send(S_REGISTER, " ", S_VERSION, " ", buf, " ", (const char *) ME.nick, " ", net.irc.origin, NULL); return; } } break; } case 3: { if(!strcmp(arg[0], S_REGISTER)) { mem_strcpy(net.hub.name, arg[1]); net.hub.tmpint = 0; net.hub.status |= STATUS_CONNECTED | STATUS_REGISTERED | STATUS_BOT; net.hub.killTime = NOW + set.CONN_TIMEOUT; net.hub.lastPing = NOW; net.hub.enableCrypt(((entMD5Hash *) &config.hub.getPass())->getHash(), 16); net.sendBotListTo(&net.hub); net.propagate(&net.hub, S_BJOIN, " ", net.hub.name, NULL); config.currentHub->failures = 0; net.propagate(NULL, S_CHNICK, " ", (const char *) ME.nick, " ", net.irc.origin, NULL); return; } } default: break; } /* HUH */ net.hub.close("Access Denied"); } /* REGISTERED HUB */ net.hub.killTime = NOW + set.CONN_TIMEOUT; if(!strcmp(arg[0], S_UL_UPLOAD_START)) { if(userlist.ulbuf) { net.send(HAS_N, "[!] Double UL download, this should not happen", NULL); sleep(5); net.send(HAS_N, "[!] Terminating.", NULL); exit(1337); } userlist.ulbuf = new Pchar(64*1024); return; } if(!strcmp(arg[0], S_UL_UPLOAD_END)) { if(!userlist.ulbuf) { net.send(HAS_N, "[!] Update userlist is empty", NULL); net.send(HAS_N, "[-] Disconnecting", NULL); net.hub.close("Userlist is empty"); return; } userlist.update(); if(userlist.me()->flags[GLOBAL] & HAS_P) hostNotify = 1; else hostNotify = 0; userlist.sendToAll(); return; } if(userlist.ulbuf) { userlist.ulbuf->push(data); userlist.ulbuf->push("\n"); return; } if(!strcmp(arg[0], S_CYCLE) && strlen(arg[1])) { if(ME.findChannel(arg[1])) { net.irc.send("PART ", arg[1], " :", (const char *) config.cyclereason, NULL); ME.rejoin(arg[1], set.CYCLE_DELAY); if(strlen(arg[2])) net.send(HAS_N, "[*] Doing cycle on ", arg[1], NULL); } net.propagate(&net.hub, data, NULL); return; } if(!strcmp(arg[0], S_MKA) && strlen(arg[1])) { ch = ME.findChannel(arg[1]); if(ch) ch->massKick(MK_ALL, !strcmp(arg[3], "close") || !strcmp(arg[3], "lock")); return; } if(!strcmp(arg[0], S_MKO) && strlen(arg[1])) { ch = ME.findChannel(arg[1]); if(ch) ch->massKick(MK_OPS, !strcmp(arg[3], "close") || !strcmp(arg[3], "lock")); return; } if(!strcmp(arg[0], S_MKN) && strlen(arg[1])) { ch = ME.findChannel(arg[1]); if(ch) ch->massKick(MK_NONOPS, !strcmp(arg[3], "close") || !strcmp(arg[3], "lock")); return; } if(!strcmp(arg[0], S_UNLINK) && strlen(arg[1])) { HANDLE *h = userlist.findHandle(arg[1]); if(h && userlist.isBot(h)) { inetconn *bot = net.findConn(h); if(bot) bot->close("Forced unlink"); } return; } if(!strcmp(arg[0], S_NICK) && strlen(arg[1])) { net.irc.send("NICK ", arg[1], NULL); ME.nextNickCheck = NOW + set.KEEP_NICK_CHECK_DELAY; return; } if(!strcmp(arg[0], S_JUMP) && strlen(arg[2])) { ME.jump(arg[2], arg[3], arg[1]); return; } #ifdef HAVE_IPV6 if(!strcmp(arg[0], S_JUMP6) && strlen(arg[2])) { ME.jump(arg[2], arg[3], arg[1], AF_INET6); return; } #endif if(!strcmp(arg[0], S_JUMPS5) && strlen(arg[5])) { ME.jumps5(arg[2], atoi(arg[3]), arg[4], atoi(arg[5]), arg[1]); return; } if(!strcmp(arg[0], S_RDIE) && strlen(arg[1])) { net.send(HAS_N, "[!] ", DIE_REASON, NULL); net.irc.send("QUIT :", arg[1], " ", DIE_REASON2, NULL); safeExit(); } if(!strcmp(arg[0], S_NAMES) && strlen(arg[2])) { ch = ME.findChannel(arg[2]); if(ch) ch->names(arg[1]); else net.sendOwner(arg[1], "Invalid channel", NULL); return; } if(!strcmp(arg[0], S_CWHO) && strlen(arg[2])) { ch = ME.findChannel(arg[2]); if(ch) ch->cwho(arg[1], arg[3]); else net.sendOwner(arg[1], "Invalid channel", NULL); return; } if(!strcmp(arg[0], S_PSOTUPDATE)) { psotget.forkAndGo(arg[1]); return; } if(!strcmp(arg[0], S_STOPUPDATE)) { psotget.end(); return; } if(!strcmp(arg[0], S_RESTART)) { ME.restart(); return; } if(!strcmp(arg[0], S_ULSAVE)) { userlist.save(config.userlist_file); ME.nextRecheck = NOW + 5; net.propagate(&net.hub, data, NULL); return; } if(!strcmp(arg[0], S_RJOIN) && strlen(arg[2])) { userlist.rjoin(arg[1], arg[2]); net.propagate(&net.hub, data, NULL); ++userlist.SN; return; } if(!strcmp(arg[0], S_RPART) && strlen(arg[2])) { userlist.rpart(arg[1], arg[2], arg[3]); net.propagate(&net.hub, data, NULL); ++userlist.SN; return; } if(!strcmp(arg[0], S_STATUS) && strlen(arg[1])) { ME.sendStatus(arg[1]); return; } if(!strcmp(arg[0], S_CHKHOST) && strlen(arg[1])) { ME.checkMyHost(arg[1]); return; } if(parse_botnet(&net.hub, data)) return; if(userlist.parse(data)) { ++userlist.SN; //some things should not be propagated if(config.bottype == BOT_SLAVE) { if(!strcmp(S_ADDBOT, arg[0])) { net.propagate(&net.hub, S_ADDBOT, " ", arg[1], " ", arg[2], " ", arg[3], " ", S_SECRET, NULL); return; } if(!strcmp(S_PASSWD, arg[0]) && userlist.isBot(arg[1])) { net.propagate(&net.hub, S_PASSWD, " ", arg[1], " ", "00000000000000000000000000000000", NULL); return; } if(!strcmp(S_ADDR, arg[0]) && userlist.isBot(arg[1])) { net.propagate(&net.hub, S_ADDR, " ", arg[1], " ", "0.0.0.0", NULL); return; } if(!strcmp(S_ADDOFFENCE, arg[0])) // leaf dont need infos about offence-history return; } net.propagate(&net.hub, data, NULL); return; } }
int main(int argc, char *argv[]) { if (argc < 2) return 1; if (!strcmp(argv[1], "string_join")) { char *foo = string_join("bar", "baz", "quux", NULL); if (strcmp(foo, "barbazquux") != 0) { printf("%s != barbazquux\n", foo); return 1; } foo = string_join("hello", NULL); if (strcmp(foo, "hello") != 0) { printf("%s != hello\n", foo); return 1; } return 0; } else if (!strcmp(argv[1], "fread_line")) { FILE *fp = fopen(TESTDATADIR "/_fread_line.txt", "r"); char *line; size_t len; if (fp == NULL) { perror("Failed to open " TESTDATADIR "/_fread_line.txt"); return 1; } line = fread_line(fp, &len); printf("len = %d orig = %d\n", len, strlen("Hello world!\n")); if (strcmp(line, "Hello world!\n") != 0) { printf("'%s' != 'Hello world!\\n'\n", line); return 1; } ckd_free(line); line = fread_line(fp, &len); /* A line of exactly 127 characters. */ printf("len = %d orig = %d\n", len, strlen("123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456\n")); if (strcmp(line, "123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456\n") != 0) { printf("'%s' != '123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456\\n'\n", line); return 1; } ckd_free(line); /* A very long line. */ line = fread_line(fp, &len); printf("len = %d orig = %d\n", len, strlen("All work and no play makes Jack a very dull boy. All work and no play makes Jack a very dull boy. All work and no play makes Jack a very dull boy. All work and no play makes Jack a very dull boy. All work and no play makes Jack a very dull boy. All work and no play makes Jack a very dull boy. \n")); if (strcmp(line, "All work and no play makes Jack a very dull boy. All work and no play makes Jack a very dull boy. All work and no play makes Jack a very dull boy. All work and no play makes Jack a very dull boy. All work and no play makes Jack a very dull boy. All work and no play makes Jack a very dull boy. \n") != 0) { printf("'%s' != 'All work and no play makes Jack a very dull boy. All work and no play makes Jack a very dull boy. All work and no play makes Jack a very dull boy. All work and no play makes Jack a very dull boy. All work and no play makes Jack a very dull boy. All work and no play makes Jack a very dull boy. \\n'\n", line); return 1; } ckd_free(line); line = fread_line(fp, &len); if (line != NULL) { printf("%p != NULL\n", line); return 1; } } else if (!strcmp(argv[1], "string_trim")) { char *foo = ckd_salloc("\t foo bar baz \n"); string_trim(foo, STRING_BOTH); if (strcmp(foo, "foo bar baz") != 0) { printf("'%s' != 'foo bar baz'\n", foo); return 1; } string_trim(foo, STRING_BOTH); if (strcmp(foo, "foo bar baz") != 0) { printf("'%s' != 'foo bar baz'\n", foo); return 1; } strcpy(foo, "foo\nbar\n\n"); string_trim(foo, STRING_END); if (strcmp(foo, "foo\nbar") != 0) { printf("'%s' != 'foo\\nbar'\n", foo); return 1; } strcpy(foo, " \t \t foobar\n"); string_trim(foo, STRING_START); if (strcmp(foo, "foobar\n") != 0) { printf("'%s' != 'foobar\\n'\n", foo); return 1; } } else if (!strcmp(argv[1], "str2words")) { char *line = ckd_salloc(" foo bar baz argh"); char **words; int n; n = str2words(line, NULL, 0); if (n != 4) { printf("%d != 4\n", n); return 1; } words = ckd_calloc(n, sizeof(*words)); n = str2words(line, words, n); if (n != 4) { printf("%d != 4\n", n); return 1; } if (strcmp(words[0], "foo") != 0 || strcmp(words[1], "bar") != 0 || strcmp(words[2], "baz") != 0 || strcmp(words[3], "argh") != 0) { printf("%s, %s, %s, %s != foo, bar, baz, argh\n", words[0], words[1], words[2], words[3]); return 1; } return 0; } else if (!strcmp(argv[1], "nextword")) { char *line = ckd_salloc(" \tfoo bar\nbaz argh"); char *word; const char *delim = " \t\n"; char delimfound; int n; n = nextword(line, delim, &word, &delimfound); if (strcmp(word, "foo") != 0) { printf("%s != foo\n", word); return 1; } if (delimfound != ' ') { printf("didn't find ' '\n"); return 1; } word[n] = delimfound; line = word + n; n = nextword(line, delim, &word, &delimfound); if (strcmp(word, "bar") != 0) { printf("%s != bar\n", word); return 1; } if (delimfound != '\n') { printf("didn't find '\\n'\n"); return 1; } word[n] = delimfound; line = word + n; n = nextword(line, delim, &word, &delimfound); if (strcmp(word, "baz") != 0) { printf("%s != baz\n", word); return 1; } if (delimfound != ' ') { printf("didn't find ' '\n"); return 1; } word[n] = delimfound; line = word + n; n = nextword(line, delim, &word, &delimfound); if (strcmp(word, "argh") != 0) { printf("%s != argh\n", word); return 1; } if (delimfound != '\0') { printf("didn't find NUL\n"); return 1; } word[n] = delimfound; line = word + n; n = nextword(line, delim, &word, &delimfound); if (n != -1) { printf("didn't get -1 at end of string\n"); } line = ckd_salloc("FOO!"); n = nextword(line, delim, &word, &delimfound); if (strcmp(word, "FOO!") != 0) { printf("%s != FOO!\n", word); return 1; } if (delimfound != '\0') { printf("didn't find NUL\n"); return 1; } return 0; } return 0; }
int agg_phn_seg(lexicon_t *lex, acmod_set_t *acmod_set, feat_t *fcb, segdmp_type_t type) { uint16 *seg; vector_t *mfcc; vector_t **feat; int32 n_frame; uint32 tick_cnt; acmod_id_t *phone; uint32 *start; uint32 *len; uint32 n_phone; uint32 s; char *btw_mark; char *trans; char **word; uint32 n_word; int32 mfc_veclen = cmd_ln_int32("-ceplen"); uint32 n_stream; uint32 *veclen; tick_cnt = 0; n_stream = feat_dimension1(fcb); veclen = feat_stream_lengths(fcb); while (corpus_next_utt()) { if ((++tick_cnt % 500) == 0) { E_INFOCONT("[%u] ", tick_cnt); } if (corpus_get_sent(&trans) != S3_SUCCESS) { E_FATAL("Unable to read word transcript for %s\n", corpus_utt_brief_name()); } if (corpus_get_seg(&seg, &n_frame) != S3_SUCCESS) { E_FATAL("Unable to read Viterbi state segmentation for %s\n", corpus_utt_brief_name()); } n_word = str2words(trans, NULL, 0); word = ckd_calloc(n_word, sizeof(char*)); str2words(trans, word, n_word); phone = mk_phone_list(&btw_mark, &n_phone, word, n_word, lex); start = ckd_calloc(n_phone, sizeof(uint32)); len = ckd_calloc(n_phone, sizeof(uint32)); /* check to see whether the word transcript and dictionary entries agree with the state segmentation */ if (ck_seg(acmod_set, phone, n_phone, seg, n_frame, corpus_utt()) != S3_SUCCESS) { free(trans); /* alloc'ed using strdup, not ckd_*() */ free(seg); /* alloc'ed using malloc in areadshort(), not ckd_*() */ ckd_free(word); ckd_free(phone); E_ERROR("ck_seg failed"); continue; } if (cvt2triphone(acmod_set, phone, btw_mark, n_phone) != S3_SUCCESS) { free(trans); /* alloc'ed using strdup, not ckd_*() */ free(seg); /* alloc'ed using malloc in areadshort(), not ckd_*() */ ckd_free(word); ckd_free(phone); E_ERROR("cvt2triphone failed"); continue; } ckd_free(btw_mark); if (mk_seg(acmod_set, seg, n_frame, phone, start, len, n_phone) != S3_SUCCESS) { free(trans); free(seg); ckd_free(word); ckd_free(phone); E_ERROR("mk_seg failed"); continue; } if (corpus_provides_mfcc()) { if (corpus_get_generic_featurevec(&mfcc, &n_frame, mfc_veclen) < 0) { E_FATAL("Can't read input features from %s\n", corpus_utt()); } if (n_frame < 9) { E_WARN("utt %s too short\n", corpus_utt()); if (mfcc) { ckd_free(mfcc[0]); ckd_free(mfcc); mfcc = NULL; } continue; } feat = feat_array_alloc(fcb, n_frame + feat_window_size(fcb)); feat_s2mfc2feat_live(fcb, mfcc, &n_frame, TRUE, TRUE, feat); for (s = 0; s < n_phone; s++) { segdmp_add_feat(phone[s], &feat[start[s]], len[s]); } feat_array_free(feat); free(&mfcc[0][0]); ckd_free(mfcc); } else { E_FATAL("No data type specified\n"); } free(trans); /* alloc'ed using strdup, not ckd_*() */ free(seg); /* alloc'ed using malloc in areadshort(), not ckd_*() */ ckd_free(word); ckd_free(phone); ckd_free(start); ckd_free(len); } return 0; }
int Raw::calculatePenalty(const char *data) { char argv[10][MAX_LEN], *name, *p=NULL; int len, argc, mypenalty=0; const int maxpenalty=10; len=strlen(data); mypenalty=(1+len/100); argc=str2words(argv[0], data, 10, MAX_LEN, 0); if(!strcasecmp(argv[0], "MODE")) { /* argv[1] = target; channels and/or user * argv[2] = optional modes * argv[n] = optional parameters */ for(name=strtok_r(argv[1], ",", &p); name; name=strtok_r(NULL, ",", &p)) { if(chan::isChannel(name)) calculatePenaltyOfChanmode(argv[2], argc-3, &mypenalty); else calculatePenaltyOfUsermode(argv[2], &mypenalty); } } else if(!strcasecmp(argv[0], "UMODE")) { /* argv[1] - username to change mode for * argv[2] - modes to change */ calculatePenaltyOfUsermode(argv[2], &mypenalty); } else if(!strcasecmp(argv[0], "KICK")) { /* argv[1] = channel * argv[2] = client to kick * argv[3] = kick comment */ int user_cnt=0; // count users to kick out for(name=strtok_r(argv[2], ",", &p); name; name=strtok_r(NULL, ",", &p)) user_cnt++; // if there are multiple channels, the users will be kicked out on each one for(name=strtok_r(argv[1], ",", &p); name; name=strtok_r(NULL, ",", &p)) mypenalty+=3*user_cnt; // do not care if kick was successful, just to go the maximum /* alternative: * * mypenalty+=user_cnt; * TODO: we must increase the penalty (+2) if the kick was successful -> parse_irc() */ } else if(!strcasecmp(argv[0], "PRIVMSG") || !strcasecmp(argv[0], "NOTICE")) { // argv[1] = receiver list // argv[2] = text for(name=strtok_r(argv[1], ",", &p); name; name=strtok_r(NULL, ",", &p)) mypenalty+=1; } else if(!strcasecmp(argv[0], "TOPIC")) { /* argv[1] = channel list * argv[2] = topic */ mypenalty+=1; if(*argv[2]) { // changing topic for(name=strtok_r(argv[1], ",", &p); name; name=strtok_r(NULL, ",", &p)) mypenalty+=2; } } else if(!strcasecmp(argv[0], "AWAY")) { // argv[1] = away message if(!*argv[1]) // marking as not away mypenalty+=1; else // marking as away mypenalty+=2; } else if(!strcasecmp(argv[0], "MOTD")) { // argv[1] = servername if(*argv[1]) // remote MOTD mypenalty+=5; else mypenalty+=2; } else if(!strcasecmp(argv[0], "ADMIN")) { // argv[1] = servername if(*argv[1]) // remote ADMIN mypenalty+=3; else mypenalty+=2; } else if(!strcasecmp(argv[0], "INFO")) { // argv[1] = servername if(*argv[1]) // remote INFO mypenalty+=10; else mypenalty+=5; } else if(!strcasecmp(argv[0], "LINKS")) { /* argv[1] = servername mask * or: * argv[1] = server to query * argv[2] = servername mask */ if(*argv[1] && *argv[2]) // remote LINKS mypenalty+=5; else mypenalty+=2; } else if(!strcasecmp(argv[0], "NAMES")) { /* argv[1] = channel list * argv[2] = server to query */ if(*argv[2]) { // query another irc server for NAMES mypenalty+=maxpenalty; } else if(*argv[1]) { int chan_cnt=1; for(name=strtok_r(argv[1], ",", &p); name; name=strtok_r(NULL, ",", &p)) chan_cnt++; chan_cnt=chan_cnt<2?2:(chan_cnt*ME.server.isupport.maxchannels)/10; mypenalty+=chan_cnt<2?2:chan_cnt; } else mypenalty+=maxpenalty; } else if(!strcasecmp(argv[0], "LUSERS")) { /* argv[1] = host/server mask * argv[2] = server to query */ if(*argv[1] && *argv[2]) // remote LUSERS mypenalty+=3; else mypenalty+=2; } else if(!strcasecmp(argv[0], "USERS")) { // argv[1] = servername if(*argv[1]) // remote USERS mypenalty+=3; else mypenalty+=2; } else if(!strcasecmp(argv[0], "WHO")) { // argv[1] = nickname mask or channel list // argv[2] = additional selection flag (like 'o') // FIXME: this can also be maxpenalty for(name=strtok_r(argv[1], ",", &p); name; name=strtok_r(NULL, ",", &p)) mypenalty+=1; } else if(!strcasecmp(argv[0], "WHOIS")) { // argv[1] = nickname masklist mypenalty+=2; // XXX: we must increase penalty (+1) if we got a whois reply from another server } else if(!strcasecmp(argv[0], "WHOWAS")) { /* argv[1] = nickname * argv[2] = maximum replies * argv[3] = server to query */ for(name=strtok_r(argv[1], ",", &p); name; name=strtok_r(NULL, ",", &p)) { if(*argv[3]) mypenalty+=3; else mypenalty+=2; } } else if(!strcasecmp(argv[0], "LIST")) { // argv[1] = channel list // argv[2] = server to query if(*argv[2]) // remote LIST mypenalty+=10; else mypenalty+=2; } else if(!strcasecmp(argv[0], "STATS")) mypenalty+=5; // maximum else if(!strcasecmp(argv[0], "SQUERY")) mypenalty+=2; else if(!strcasecmp(argv[0], "INVITE")) mypenalty+=3; // maximum else if(!strcasecmp(argv[0], "JOIN")) mypenalty+=2; else if(!strcasecmp(argv[0], "PART")) mypenalty+=4; else if(!strcasecmp(argv[0], "NICK")) mypenalty+=3; else if(!strcasecmp(argv[0], "TRACE")) mypenalty+=2; else if(!strcasecmp(argv[0], "VERSION")) mypenalty+=2; else if(!strcasecmp(argv[0], "SERVLIST")) mypenalty+=2; else if(!strcasecmp(argv[0], "MAP")) mypenalty+=2; else if(!strcasecmp(argv[0], "TIME")) mypenalty+=2; else if(!strcasecmp(argv[0], "HELP")) mypenalty+=2; else // everything else, e.g. PING, PONG, ISON mypenalty+=1; // FIXME: there is no #define DEBUG in config.h :-P DEBUG(printf("[D] Adding penalty %d\n", mypenalty)); return mypenalty; }
void SubOp::onPrivmsg(const char *from, const char *to, const char *msg) { // Check if we match any of our keywords if (match("!kick *",msg) || match("!kban *",msg) || match("!quick *",msg) || match("!topic *",msg)) { char arg[50][MAX_LEN]; // arguments char user[MAX_LEN] = ""; // the user to perform the action on char rest[MAX_LEN] = ""; // the rest of the line (minus the user) char whole[MAX_LEN] = ""; // the whole line char nick[15] = ""; // local nickname char *pch; // position holder chan *ch = ME.findChannel(to); // channel we are acting in // check if we have a channel if(ch) { //do i have op ? if(ch->me->flags & IS_OP) { // get the user who is performing the action chanuser *u = ch->getUser(from); // check if the user is valid, has the e flag and is voiced or oped in the channel currently if((u) && (u->flags & HAS_E) && (u->flags & IS_VOICE || u->flags & IS_OP)) { // break up the line str2words(arg[0], msg, 50, MAX_LEN, 0); // get the user strcpy(user,arg[1]); // loop through the line to concat it into one string again (minus the user) for (int i=2;i < 50;i++) { if (strlen(arg[i]) > 0) { strcat(rest,strcat(arg[i]," ")); } } // get the 'whole' line (the user to act on, a space and the rest of the line) strcat(whole,user); strcat(whole," "); strcat(whole,rest); // get just the nick from the nick!ident@host string pch=strchr(from,'!'); strncat(nick,from,pch-from); // check if we are setting the topic if(match("!topic *",msg)) { // stick 'nick:' on the front of the topic string strcat(nick,":"); strcat(nick,whole); // set the topic net.irc.send("TOPIC ", (const char *) ch->name, " :", nick, NULL); } else { // we arent setting the topic, so we are acting on another user, get that user chanuser *o = ch->getUser(user); // check if the person is trying to kick either myself or a permanent owner if (o && ((o == ch->me) || (o->flags & HAS_X)) && !(u->flags & HAS_X)) { // kick the user for being naughty ch->kick(u,"Don't try it, f****r."); // check if we are trying to kick someone we shouldnt.. } else if (o && (o != u) && (!(o->flags & (HAS_E | HAS_O | HAS_H | HAS_S | HAS_L)) || ((u->flags & HAS_X) && !(o->flags & HAS_X)))) { // check if we are trying to kickban if(match("!kban *",msg)) { // create the kick message strcat(nick,":"); strcat(nick,rest); // kickban the user for 1200 seconds (20 mins) with the created reason ch->knockout(o,nick,1200); // check if we are kicking } else if(match("!kick *",msg)) { // kick the user with the reason ch->kick(o,rest); // check if we are quickbanning a user } else if(match("!quick *",msg)) { // create the kick message strcat(nick,":"); strcat(nick,"Quickban."); // kickban the user for 10 seconds ch->knockout(o,nick,10); } // end of checks for kick/ban type } // end of check for kicking an invalid user } // end of check for setting topic or kick/banning } // end of check for a valid user sending the command } // end of check for is I have ops } // end of check for if we found a valid channel } // end of check for if we matched our text } // end of function
static void read_ngram_instance(lineiter_t ** li, hash_table_t * wid, logmath_t * lmath, int order, int order_max, ngram_raw_t * raw_ngram) { int n; int words_expected; int i; char *wptr[NGRAM_MAX_ORDER + 1]; uint32 *word_out; *li = lineiter_next(*li); if (*li == NULL) { E_ERROR("Unexpected end of ARPA file. Failed to read %d-gram\n", order); return; } string_trim((*li)->buf, STRING_BOTH); words_expected = order + 1; if ((n = str2words((*li)->buf, wptr, NGRAM_MAX_ORDER + 1)) < words_expected) { if ((*li)->buf[0] != '\0') { E_WARN("Format error; %d-gram ignored: %s\n", order, (*li)->buf); } } else { if (order == order_max) { raw_ngram->weights = (float *) ckd_calloc(1, sizeof(*raw_ngram->weights)); raw_ngram->weights[0] = atof_c(wptr[0]); if (raw_ngram->weights[0] > 0) { E_WARN("%d-gram [%s] has positive probability. Zeroize\n", order, wptr[1]); raw_ngram->weights[0] = 0.0f; } raw_ngram->weights[0] = logmath_log10_to_log_float(lmath, raw_ngram->weights[0]); } else { float weight, backoff; raw_ngram->weights = (float *) ckd_calloc(2, sizeof(*raw_ngram->weights)); weight = atof_c(wptr[0]); if (weight > 0) { E_WARN("%d-gram [%s] has positive probability. Zeroize\n", order, wptr[1]); raw_ngram->weights[0] = 0.0f; } else { raw_ngram->weights[0] = logmath_log10_to_log_float(lmath, weight); } if (n == order + 1) { raw_ngram->weights[1] = 0.0f; } else { backoff = atof_c(wptr[order + 1]); raw_ngram->weights[1] = logmath_log10_to_log_float(lmath, backoff); } } raw_ngram->words = (uint32 *) ckd_calloc(order, sizeof(*raw_ngram->words)); for (word_out = raw_ngram->words + order - 1, i = 1; word_out >= raw_ngram->words; --word_out, i++) { hash_table_lookup_int32(wid, wptr[i], (int32 *) word_out); } } }
int32 read_classdef_file(hash_table_t * classes, const char *file_name) { FILE *fp; int32 is_pipe; int inclass; /**< Are we currently reading a list of class words? */ int32 rv = -1; gnode_t *gn; glist_t classwords = NULL; glist_t classprobs = NULL; char *classname = NULL; if ((fp = fopen_comp(file_name, "r", &is_pipe)) == NULL) { E_ERROR("File %s not found\n", file_name); return -1; } inclass = FALSE; while (!feof(fp)) { char line[512]; char *wptr[2]; int n_words; if (fgets(line, sizeof(line), fp) == NULL) break; n_words = str2words(line, wptr, 2); if (n_words <= 0) continue; if (inclass) { /* Look for an end of class marker. */ if (n_words == 2 && 0 == strcmp(wptr[0], "END")) { classdef_t *classdef; gnode_t *word, *weight; int32 i; if (classname == NULL || 0 != strcmp(wptr[1], classname)) goto error_out; inclass = FALSE; /* Construct a class from the list of words collected. */ classdef = ckd_calloc(1, sizeof(*classdef)); classwords = glist_reverse(classwords); classprobs = glist_reverse(classprobs); classdef->n_words = glist_count(classwords); classdef->words = ckd_calloc(classdef->n_words, sizeof(*classdef->words)); classdef->weights = ckd_calloc(classdef->n_words, sizeof(*classdef->weights)); word = classwords; weight = classprobs; for (i = 0; i < classdef->n_words; ++i) { classdef->words[i] = gnode_ptr(word); classdef->weights[i] = gnode_float32(weight); word = gnode_next(word); weight = gnode_next(weight); } /* Add this class to the hash table. */ if (hash_table_enter(classes, classname, classdef) != classdef) { classdef_free(classdef); goto error_out; } /* Reset everything. */ glist_free(classwords); glist_free(classprobs); classwords = NULL; classprobs = NULL; classname = NULL; } else { float32 fprob; if (n_words == 2) fprob = atof_c(wptr[1]); else fprob = 1.0f; /* Add it to the list of words for this class. */ classwords = glist_add_ptr(classwords, ckd_salloc(wptr[0])); classprobs = glist_add_float32(classprobs, fprob); } } else { /* Start a new LM class if the LMCLASS marker is seen */ if (n_words == 2 && 0 == strcmp(wptr[0], "LMCLASS")) { if (inclass) goto error_out; inclass = TRUE; classname = ckd_salloc(wptr[1]); } /* Otherwise, just ignore whatever junk we got */ } } rv = 0; /* Success. */ error_out: /* Free all the stuff we might have allocated. */ fclose_comp(fp, is_pipe); for (gn = classwords; gn; gn = gnode_next(gn)) ckd_free(gnode_ptr(gn)); glist_free(classwords); glist_free(classprobs); ckd_free(classname); return rv; }
static int32 dict_read(FILE * fp, dict_t * d) { char line[16384], **wptr; s3cipid_t p[4096]; int32 lineno, nwd; s3wid_t w; int32 i, maxwd; s3cipid_t ci; int32 ph; maxwd = 4092; wptr = (char **) ckd_calloc(maxwd, sizeof(char *)); /* Freed below */ lineno = 0; while (fgets(line, sizeof(line), fp) != NULL) { lineno++; if (line[0] == '#') /* Comment line */ continue; if ((nwd = str2words(line, wptr, maxwd)) < 0) E_FATAL("str2words(%s) failed; Increase maxwd from %d\n", line, maxwd); if (nwd == 0) /* Empty line */ continue; /* wptr[0] is the word-string and wptr[1..nwd-1] the pronunciation sequence */ if (nwd == 1) { E_ERROR("Line %d: No pronunciation for word %s; ignored\n", lineno, wptr[0]); continue; } {char * fin; float proba=0.0; int deca=0; proba=strtod(wptr[1],&fin); if (fin !=wptr[1]) deca=1; else proba=0.0; /* Convert pronunciation string to CI-phone-ids */ for (i = 1; i < nwd-deca; i++) { p[i - 1] = dict_ciphone_id(d, wptr[i+deca]); if (NOT_S3CIPID(p[i - 1])) { E_ERROR("Line %d: Bad ciphone: %s; word %s ignored\n", lineno, wptr[i], wptr[0]); break; } } if (i == nwd-deca) { /* All CI-phones successfully converted to IDs */ w = dict_add_word(d, wptr[0], p, nwd - 1-deca); if (NOT_S3WID(w)) E_ERROR ("Line %d: dict_add_word (%s) failed (duplicate?); ignored\n", lineno, wptr[0]); d->word[w].proba=proba; } } } if (d->lts_rules) { #if 1 /* Until we allow user to put in a mapping of the phoneset from LTS to the phoneset from mdef, The checking will intrusively stop the recognizer. */ for (ci = 0; ci < mdef_n_ciphone(d->mdef); ci++) { if (!mdef_is_fillerphone(d->mdef, ci)) { for (ph = 0; cmu6_lts_phone_table[ph] != NULL; ph++) { /* E_INFO("%s %s\n",cmu6_lts_phone_table[ph],mdef_ciphone_str(d->mdef,ci)); */ if (!strcmp (cmu6_lts_phone_table[ph], mdef_ciphone_str(d->mdef, ci))) break; } if (cmu6_lts_phone_table[ph] == NULL) { E_FATAL ("A phone in the model definition doesn't appear in the letter to sound ", "rules. \n This is case we don't recommend user to ", "use the built-in LTS. \n Please kindly turn off ", "-lts_mismatch\n"); } } } #endif }
int batch_decoder_run(batch_decoder_t *bd) { int32 ctloffset, ctlcount, ctlincr; lineiter_t *li, *ali = NULL; search_run(bd->fwdtree); search_run(bd->fwdflat); ctloffset = cmd_ln_int32_r(bd->config, "-ctloffset"); ctlcount = cmd_ln_int32_r(bd->config, "-ctlcount"); ctlincr = cmd_ln_int32_r(bd->config, "-ctlincr"); if (bd->alignfh) ali = lineiter_start(bd->alignfh); for (li = lineiter_start(bd->ctlfh); li; li = lineiter_next(li)) { alignment_t *al = NULL; char *wptr[4]; int32 nf, sf, ef; if (li->lineno < ctloffset) { if (ali) ali = lineiter_next(ali); continue; } if ((li->lineno - ctloffset) % ctlincr != 0) { if (ali) ali = lineiter_next(ali); continue; } if (ctlcount != -1 && li->lineno >= ctloffset + ctlcount) break; if (ali) al = parse_alignment(ali->buf, search_factory_d2p(bd->sf)); sf = 0; ef = -1; nf = str2words(li->buf, wptr, 4); if (nf == 0) { /* Do nothing. */ } else if (nf < 0) { E_ERROR("Unexpected extra data in control file at line %d\n", li->lineno); } else { char *file, *uttid; file = wptr[0]; uttid = NULL; if (nf > 1) sf = atoi(wptr[1]); if (nf > 2) ef = atoi(wptr[2]); if (nf > 3) uttid = wptr[3]; /* Do actual decoding. */ batch_decoder_decode(bd, file, uttid, sf, ef, al); } alignment_free(al); if (ali) ali = lineiter_next(ali); } featbuf_producer_shutdown(search_factory_featbuf(bd->sf)); return 0; }
int ps_add_word(ps_decoder_t *ps, char const *word, char const *phones, int update) { int32 wid; s3cipid_t *pron; hash_iter_t *search_it; char **phonestr, *tmp; int np, i, rv; /* Parse phones into an array of phone IDs. */ tmp = ckd_salloc(phones); np = str2words(tmp, NULL, 0); phonestr = ckd_calloc(np, sizeof(*phonestr)); str2words(tmp, phonestr, np); pron = ckd_calloc(np, sizeof(*pron)); for (i = 0; i < np; ++i) { pron[i] = bin_mdef_ciphone_id(ps->acmod->mdef, phonestr[i]); if (pron[i] == -1) { E_ERROR("Unknown phone %s in phone string %s\n", phonestr[i], tmp); ckd_free(phonestr); ckd_free(tmp); ckd_free(pron); return -1; } } /* No longer needed. */ ckd_free(phonestr); ckd_free(tmp); /* Add it to the dictionary. */ if ((wid = dict_add_word(ps->dict, word, pron, np)) == -1) { ckd_free(pron); return -1; } /* No longer needed. */ ckd_free(pron); /* Now we also have to add it to dict2pid. */ dict2pid_add_word(ps->d2p, wid); /* TODO: we definitely need to refactor this */ for (search_it = hash_table_iter(ps->searches); search_it; search_it = hash_table_iter_next(search_it)) { ps_search_t *search = hash_entry_val(search_it->ent); if (!strcmp(PS_SEARCH_NGRAM, ps_search_name(search))) { ngram_model_t *lmset = ((ngram_search_t *) search)->lmset; if (ngram_model_add_word(lmset, word, 1.0) == NGRAM_INVALID_WID) { hash_table_iter_free(search_it); return -1; } } if (update) { if ((rv = ps_search_reinit(search, ps->dict, ps->d2p) < 0)) { hash_table_iter_free(search_it); return rv; } } } /* Rebuild the widmap and search tree if requested. */ return wid; }
static void process_ctl(ps_decoder_t *ps, cmd_ln_t *config, FILE *ctlfh) { int32 ctloffset, ctlcount, ctlincr; int32 i; char *line; size_t len; FILE *hypfh = NULL, *hypsegfh = NULL, *ctmfh = NULL; FILE *mllrfh = NULL, *lmfh = NULL, *fsgfh = NULL; double n_speech, n_cpu, n_wall; char const *outlatdir; char const *nbestdir; char const *str; int frate; ctloffset = cmd_ln_int32_r(config, "-ctloffset"); ctlcount = cmd_ln_int32_r(config, "-ctlcount"); ctlincr = cmd_ln_int32_r(config, "-ctlincr"); outlatdir = cmd_ln_str_r(config, "-outlatdir"); nbestdir = cmd_ln_str_r(config, "-nbestdir"); frate = cmd_ln_int32_r(config, "-frate"); if ((str = cmd_ln_str_r(config, "-mllrctl"))) { mllrfh = fopen(str, "r"); if (mllrfh == NULL) { E_ERROR_SYSTEM("Failed to open MLLR control file file %s", str); goto done; } } if ((str = cmd_ln_str_r(config, "-fsgctl"))) { fsgfh = fopen(str, "r"); if (fsgfh == NULL) { E_ERROR_SYSTEM("Failed to open FSG control file file %s", str); goto done; } } if ((str = cmd_ln_str_r(config, "-lmnamectl"))) { lmfh = fopen(str, "r"); if (lmfh == NULL) { E_ERROR_SYSTEM("Failed to open LM name control file file %s", str); goto done; } } if ((str = cmd_ln_str_r(config, "-hyp"))) { hypfh = fopen(str, "w"); if (hypfh == NULL) { E_ERROR_SYSTEM("Failed to open hypothesis file %s for writing", str); goto done; } setbuf(hypfh, NULL); } if ((str = cmd_ln_str_r(config, "-hypseg"))) { hypsegfh = fopen(str, "w"); if (hypsegfh == NULL) { E_ERROR_SYSTEM("Failed to open hypothesis file %s for writing", str); goto done; } setbuf(hypsegfh, NULL); } if ((str = cmd_ln_str_r(config, "-ctm"))) { ctmfh = fopen(str, "w"); if (ctmfh == NULL) { E_ERROR_SYSTEM("Failed to open hypothesis file %s for writing", str); goto done; } setbuf(ctmfh, NULL); } i = 0; while ((line = fread_line(ctlfh, &len))) { char *wptr[4]; int32 nf, sf, ef; char *mllrline = NULL, *lmline = NULL, *fsgline = NULL; char *fsgfile = NULL, *lmname = NULL, *mllrfile = NULL; if (mllrfh) { mllrline = fread_line(mllrfh, &len); if (mllrline == NULL) { E_ERROR("File size mismatch between control and MLLR control\n"); ckd_free(line); ckd_free(mllrline); goto done; } mllrfile = string_trim(mllrline, STRING_BOTH); } if (lmfh) { lmline = fread_line(lmfh, &len); if (lmline == NULL) { E_ERROR("File size mismatch between control and LM control\n"); ckd_free(line); ckd_free(lmline); goto done; } lmname = string_trim(lmline, STRING_BOTH); } if (fsgfh) { fsgline = fread_line(fsgfh, &len); if (fsgline == NULL) { E_ERROR("File size mismatch between control and FSG control\n"); ckd_free(line); ckd_free(fsgline); goto done; } fsgfile = string_trim(fsgline, STRING_BOTH); } if (i < ctloffset) { i += ctlincr; goto nextline; } if (ctlcount != -1 && i >= ctloffset + ctlcount) { goto nextline; } sf = 0; ef = -1; nf = str2words(line, wptr, 4); if (nf == 0) { /* Do nothing. */ } else if (nf < 0) { E_ERROR("Unexpected extra data in control file at line %d\n", i); } else { char const *hyp, *file, *uttid; int32 score; file = wptr[0]; uttid = NULL; if (nf > 1) sf = atoi(wptr[1]); if (nf > 2) ef = atoi(wptr[2]); if (nf > 3) uttid = wptr[3]; E_INFO("Decoding '%s'\n", uttid ? uttid : file); /* Do actual decoding. */ if(process_mllrctl_line(ps, config, mllrfile) < 0) continue; if(process_lmnamectl_line(ps, config, lmname) < 0) continue; if(process_fsgctl_line(ps, config, fsgfile) < 0) continue; if(process_ctl_line(ps, config, file, uttid, sf, ef) < 0) continue; hyp = ps_get_hyp(ps, &score, &uttid); /* Write out results and such. */ if (hypfh) { fprintf(hypfh, "%s (%s %d)\n", hyp ? hyp : "", uttid, score); } if (hypsegfh) { write_hypseg(hypsegfh, ps, uttid); } if (ctmfh) { ps_seg_t *itor = ps_seg_iter(ps, &score); write_ctm(ctmfh, ps, itor, uttid, frate); } if (outlatdir) { write_lattice(ps, outlatdir, uttid); } if (nbestdir) { write_nbest(ps, nbestdir, uttid); } ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall); E_INFO("%s: %.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", uttid, n_speech, n_cpu, n_wall); E_INFO("%s: %.2f xRT (CPU), %.2f xRT (elapsed)\n", uttid, n_cpu / n_speech, n_wall / n_speech); E_INFO_NOFN("%s (%s %d)\n", hyp ? hyp : "", uttid, score); } i += ctlincr; nextline: ckd_free(mllrline); ckd_free(fsgline); ckd_free(lmline); ckd_free(line); } ps_get_all_time(ps, &n_speech, &n_cpu, &n_wall); E_INFO("TOTAL %.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); E_INFO("AVERAGE %.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); done: if (hypfh) fclose(hypfh); if (hypsegfh) fclose(hypsegfh); if (ctmfh) fclose(ctmfh); }
static int read_ngram_instance(lineiter_t ** li, hash_table_t * wid, logmath_t * lmath, int order, int order_max, ngram_raw_t * raw_ngram) { int n; int words_expected; int i; char *wptr[NGRAM_MAX_ORDER + 1]; uint32 *word_out; if (*li) *li = lineiter_next(*li); if (*li == NULL) { E_ERROR("Unexpected end of ARPA file. Failed to read %d-gram\n", order); return -1; } words_expected = order + 1; if ((n = str2words((*li)->buf, wptr, NGRAM_MAX_ORDER + 1)) < words_expected) { E_ERROR("Format error; %d-gram ignored: %s\n", order, (*li)->buf); return -1; } raw_ngram->order = order; if (order == order_max) { raw_ngram->prob = atof_c(wptr[0]); if (raw_ngram->prob > 0) { E_WARN("%d-gram '%s' has positive probability\n", order, wptr[1]); raw_ngram->prob = 0.0f; } raw_ngram->prob = logmath_log10_to_log_float(lmath, raw_ngram->prob); } else { float weight, backoff; weight = atof_c(wptr[0]); if (weight > 0) { E_WARN("%d-gram '%s' has positive probability\n", order, wptr[1]); raw_ngram->prob = 0.0f; } else { raw_ngram->prob = logmath_log10_to_log_float(lmath, weight); } if (n == order + 1) { raw_ngram->backoff = 0.0f; } else { backoff = atof_c(wptr[order + 1]); raw_ngram->backoff = logmath_log10_to_log_float(lmath, backoff); } } raw_ngram->words = (uint32 *) ckd_calloc(order, sizeof(*raw_ngram->words)); for (word_out = raw_ngram->words + order - 1, i = 1; word_out >= raw_ngram->words; --word_out, i++) { hash_table_lookup_int32(wid, wptr[i], (int32 *) word_out); } return 0; }
static int32 dict_read(FILE * fp, dict_t * d) { lineiter_t *li; char **wptr; s3cipid_t *p; int32 lineno, nwd; s3wid_t w; int32 i, maxwd; size_t stralloc, phnalloc; maxwd = 512; p = (s3cipid_t *) ckd_calloc(maxwd + 4, sizeof(*p)); wptr = (char **) ckd_calloc(maxwd, sizeof(char *)); /* Freed below */ lineno = 0; stralloc = phnalloc = 0; for (li = lineiter_start(fp); li; li = lineiter_next(li)) { lineno++; if (0 == strncmp(li->buf, "##", 2) || 0 == strncmp(li->buf, ";;", 2)) continue; if ((nwd = str2words(li->buf, wptr, maxwd)) < 0) { /* Increase size of p, wptr. */ nwd = str2words(li->buf, NULL, 0); assert(nwd > maxwd); /* why else would it fail? */ maxwd = nwd; p = (s3cipid_t *) ckd_realloc(p, (maxwd + 4) * sizeof(*p)); wptr = (char **) ckd_realloc(wptr, maxwd * sizeof(*wptr)); } if (nwd == 0) /* Empty line */ continue; /* wptr[0] is the word-string and wptr[1..nwd-1] the pronunciation sequence */ if (nwd == 1) { E_ERROR("Line %d: No pronunciation for word %s; ignored\n", lineno, wptr[0]); continue; } /* Convert pronunciation string to CI-phone-ids */ for (i = 1; i < nwd; i++) { p[i - 1] = dict_ciphone_id(d, wptr[i]); if (NOT_S3CIPID(p[i - 1])) { E_ERROR("Line %d: Bad ciphone: %s; word %s ignored\n", lineno, wptr[i], wptr[0]); break; } } if (i == nwd) { /* All CI-phones successfully converted to IDs */ w = dict_add_word(d, wptr[0], p, nwd - 1); if (NOT_S3WID(w)) E_ERROR ("Line %d: dict_add_word (%s) failed (duplicate?); ignored\n", lineno, wptr[0]); else { stralloc += strlen(d->word[w].word); phnalloc += d->word[w].pronlen * sizeof(s3cipid_t); } } } E_INFO("Allocated %d KiB for strings, %d KiB for phones\n", (int)stralloc / 1024, (int)phnalloc / 1024); ckd_free(p); ckd_free(wptr); return 0; }
static int read_1grams_arpa(lineiter_t ** li, uint32 count, ngram_model_t * base, unigram_t * unigrams) { uint32 i; int n; int n_parts; char *wptr[3]; while (*li && strcmp((*li)->buf, "\\1-grams:") != 0) { *li = lineiter_next(*li); } if (*li == NULL) { E_ERROR_SYSTEM("Failed to read \\1-grams: mark"); return -1; } n_parts = 2; for (i = 0; i < count; i++) { *li = lineiter_next(*li); if (*li == NULL) { E_ERROR ("Unexpected end of ARPA file. Failed to read %dth unigram\n", i + 1); return -1; } if ((n = str2words((*li)->buf, wptr, 3)) < n_parts) { E_ERROR("Format error at line %s, Failed to read unigrams\n", (*li)->buf); return -1; } unigram_t *unigram = &unigrams[i]; unigram->prob = logmath_log10_to_log_float(base->lmath, atof_c(wptr[0])); if (unigram->prob > 0) { E_WARN("Unigram '%s' has positive probability\n", wptr[1]); unigram->prob = 0; } if (n == n_parts + 1) { unigram->bo = logmath_log10_to_log_float(base->lmath, atof_c(wptr[2])); } else { unigram->bo = 0.0f; } /* TODO: classify float with fpclassify and warn if bad value occurred */ base->word_str[i] = ckd_salloc(wptr[1]); } /* fill hash-table that maps unigram names to their word ids */ for (i = 0; i < count; i++) { if ((hash_table_enter (base->wid, base->word_str[i], (void *) (long) i)) != (void *) (long) i) { E_WARN("Duplicate word in dictionary: %s\n", base->word_str[i]); } } return 0; }