void calc_interpolated_prob(arpa_lm_t *lm, arpa_lm_t *lm1, arpa_lm_t *lm2,fb_info* fb1,fb_info* fb2,double w1,double w2) { TBROWSE br; id__t id[MAX_K]; char** words; int k,j; double p1,p2,p; words=(char**)NewArray(MAX_K,MAX_WORD,sizeof(char)); for (k=1;k<=lm->n;k++) { begin_browse(lm,k,&br); j=0; printf("\nProcessing %d-gram\n",k); while (get_next_ngram(id,&br)) { j++; show_dot(j); ids2words(words,id,k,lm->vocab); p1=calc_prob(words,k,lm1,fb1); p2=calc_prob(words,k,lm2,fb2); p=w1*p1+w2*p2; lm->probs[k-1][br.pos[k-1]-1]=log10(p); /* comment out this probability correction if(lm->probs[k-1][br.pos[k-1]-1]==0.0){ lm->probs[k-1][br.pos[k-1]-1]=-0.00001; } */ } } DeleteArray(words); }
//get soft assignment prob from data and GMM model; process_mode1(data, GMM,prob); void process_mode1(const mxArray *mxfea, const mxArray *mxGMM, mxArray *output[]) { double *fea = (double *)mxGetPr(mxfea); const int *dims = mxGetDimensions(mxfea); nDim = dims[0]; nSample = dims[1]; double *gmmMu = (double *)mxGetPr(mxGetField(mxGMM, 0, "Mu")); double *gmmSigma = (double *)mxGetPr(mxGetField(mxGMM, 0, "Sigma")); // //mexPrintf("\n %f %f %f.", gmmSigma[0],gmmSigma[1],gmmSigma[2]); double *gmmPriors = (double *)mxGetPr(mxGetField(mxGMM, 0, "Priors")); const int *dims2 = mxGetDimensions(mxGetField(mxGMM, 0, "Mu")); nBase = dims2[1]; int nDim2 = dims2[0]; if (nDim!=nDim2) mexErrMsgTxt("the dim of feature and gmm model is different!"); int out[2]; out[0] = nSample; out[1] = nBase; mxArray *mxProb = mxCreateNumericArray(2, out, mxDOUBLE_CLASS, mxREAL); double *prob = (double *)mxGetPr(mxProb); ////mexPrintf("%d.%d.%d.\n", nBase, nSample,nDim); calc_prob(fea, gmmMu, gmmSigma, gmmPriors, prob); output[0] = mxProb; // //mexPrintf("%f.%f.%f.%f\n", prob[0], prob[1],prob[2],prob[3]); // return; }
static ex_t ds_histogram_hook(/*@unused@*/ word_t *key, dsv_t *data, void *userdata) { rhistogram_t *hist = (rhistogram_t *)userdata; double fw = calc_prob(data->goodcount, data->spamcount, mgood, mbad); uint idx = min(fw * INTERVALS, INTERVALS-1); /* ignore meta-tokens */ if (*key->u.text == (byte) '.') return EX_OK; hist->count[idx] += 1; if (data->spamcount == 0) { ham_only += 1; if (data->goodcount == 1) ham_hapax += 1; } if (data->goodcount == 0) { spam_only += 1; if (data->spamcount == 1) spam_hapax += 1; } return EX_OK; }
void check_prob(arpa_lm_t *lm, arpa_lm_t *lm1, arpa_lm_t *lm2,fb_info* fb1,fb_info* fb2,double w1,double w2) { TBROWSE br; id__t id[MAX_K]; char** words; int j,k; double p1,p2,p,s1,s2; int bo_pos = 0; /* for a quiet compile */ words=(char**)NewArray(MAX_K,MAX_WORD,sizeof(char)); for (k=1;k<=lm->n;k++) { begin_browse(lm,k,&br); if (k>=2) bo_pos=br.pos[k-2]; s1=s2=0; j=0; printf("\nProcessing %d-gram\n",k); while (get_next_ngram(id,&br)) { ids2words(words,id,k,lm->vocab); p1=calc_prob(words,k,lm1,fb1); p2=calc_prob(words,k,lm2,fb2); j++; show_dot(j); s1+=p1; s2+=p2; if ((k>=2 && br.pos[k-2]!=bo_pos) || (k==1 && br.pos[0]==lm->num_kgrams[0])) { s1=s2=0; } p=w1*p1+w2*p2; lm->probs[k-1][br.pos[k-1]-1]=log10(p); } } DeleteArray(words); }
static void rstats_print_rtable(rstats_t *rstats_head) { const char *pfx = !stats_in_header ? "" : " "; rstats_t *cur; /* print header */ if (!Rtable) (void)fprintf(fpo, "%s%*s %6s %-6s %-6s %-6s %s\n", pfx, max_token_len+2, "", "n", "pgood", "pbad", "fw", "U"); else (void)fprintf(fpo, "%s%*s %6s %-6s %-6s %-6s %-6s %-6s %s\n", pfx, max_token_len+2, "", "n", "pgood", "pbad", "fw", "invfwlog", "fwlog", "U"); /* Print 1 line per token */ for (cur=rstats_head->next; cur != NULL; cur=cur->next) { int len = (cur->token->leng >= max_token_len) ? 0 : (max_token_len - cur->token->leng); double fw = calc_prob(cur->good, cur->bad, cur->msgs_good, cur->msgs_bad); char flag = cur->used ? '+' : '-'; (void)fprintf(fpo, "%s\"", pfx); (void)word_puts(cur->token, 0, fpo); if (cur->msgs_good == 0 && cur->msgs_bad == 0) { flag = 'i'; (void)fprintf(fpo, "\"%*s %6lu %8s %8s %8.6f", len, " ", (unsigned long)(cur->good + cur->bad), "--------", "--------", fw); } else (void)fprintf(fpo, "\"%*s %6lu %8.6f %8.6f %8.6f", len, " ", (unsigned long)(cur->good + cur->bad), (double)cur->good / cur->msgs_good, (double)cur->bad / cur->msgs_bad, fw); if (Rtable) (void)fprintf(fpo, "%s%10.5f%10.5f", pfx, log(1.0 - fw), log(fw)); (void)fprintf(fpo, " %c\n", flag); } /* print trailer */ msg_print_summary(pfx); }
//get coding from data and GMM; process_model3(data,GMM,coding); void process_mode3(const mxArray *mxfea, const mxArray *mxGMM, mxArray *output[]) { double *fea = (double *)mxGetPr(mxfea); const int *dims = mxGetDimensions(mxfea); nDim = dims[0]; nSample = dims[1]; double *gmmMu = (double *)mxGetPr(mxGetField(mxGMM, 0, "Mu")); double *gmmSigma = (double *)mxGetPr(mxGetField(mxGMM, 0, "Sigma")); // //mexPrintf("\n %f %f %f.", gmmSigma[0],gmmSigma[1],gmmSigma[2]); double *gmmPriors = (double *)mxGetPr(mxGetField(mxGMM, 0, "Priors")); const int *dims2 = mxGetDimensions(mxGetField(mxGMM, 0, "Mu")); nBase = dims2[1]; int nDim2 = dims2[0]; if (nDim!=nDim2) mexErrMsgTxt("the dim of feature and gmm model is different!"); mwSize out[2]; out[0] = nSample; out[1] = nBase; double *prob = (double *)mxCalloc(nSample*nBase, sizeof(double)); calc_prob(fea, gmmMu, gmmSigma, gmmPriors, prob); #if (FIRST_ORDER) out[0] = nBase*nDim; #else out[0] = nBase*nDim*2 ; #endif out[1] = 1; mxArray *mxCoding = mxCreateNumericArray(2, out, mxDOUBLE_CLASS, mxREAL); double *coding = (double *)mxGetPr(mxCoding); // //mexPrintf("%f %f %f \n",prob[1], prob[2],prob[4]); if (nSample==0){ //mexPrintf("wrong! the coding samples is zeros!!!...... \n"); }else{ fisher_coding(fea, gmmMu, gmmSigma, gmmPriors, prob, coding); } output[0] = mxCoding; mxFree(prob); }
// get map from data, GMM, model, pos,size; void process_mode6(const mxArray *mxfea, const mxArray *mxGMM, const mxArray *mxModels, const mxArray *mxPos, const mxArray *mxMapSize, mxArray *output[]) { double *fea = (double *)mxGetPr(mxfea); const mwSize *dims = mxGetDimensions(mxfea); nDim = dims[0]; nSample = dims[1]; double *gmmMu = (double *)mxGetPr(mxGetField(mxGMM, 0, "Mu")); double *gmmSigma = (double *)mxGetPr(mxGetField(mxGMM, 0, "Sigma")); double *gmmPriors = (double *)mxGetPr(mxGetField(mxGMM, 0, "Priors")); const mwSize *dims2 = mxGetDimensions(mxGetField(mxGMM, 0, "Mu")); nBase = dims2[1]; int nDim2 = dims2[0]; if (nDim!=nDim2) mexErrMsgTxt("the dim of feature and gmm model is different!"); double *prob = (double *)mxCalloc(nSample*nBase, sizeof(double)); calc_prob(fea, gmmMu, gmmSigma, gmmPriors, prob); nModel = mxGetNumberOfElements(mxModels); double *conf= (double *)mxCalloc(nSample*nModel, sizeof(double)); #if (FIRST_ORDER) int nModelDim = nDim*nBase; #else int nModelDim = nDim*nBase*2; #endif double *model_w = (double *)mxCalloc(nModelDim*nModel, sizeof(double)); int i, j; for(i=0;i<nModel;i++){ mxArray *tempModel = mxGetCell(mxModels, i); // double *w = (double *)mxGetPr(mxGetField(tempModel, 0, "w_reshape")); double *w = (double *)mxGetPr(mxGetField(tempModel, 0, "w")); for (j=0;j<nModelDim;j++){ model_w[i*nModelDim+j] = w[j]; } } conf_model(fea, gmmMu, gmmSigma, gmmPriors, prob, model_w, conf); //mexPrintf("\n %f %d %d.", conf[1],nDim,nSample); int *mapSize = (int*) mxGetPr(mxMapSize); int nRows =mapSize[0]; int nCols = mapSize[1]; int nSample2 = mxGetN(mxPos); int *pos = (int*)mxGetPr(mxPos); int out []= {nRows, nCols, nModel}; //mexPrintf("%d %d %d \n",nModel, nRows,nCols); mxArray *mxMap = mxCreateNumericArray(3, out , mxDOUBLE_CLASS, mxREAL); double *map = mxGetPr(mxMap); map_conf(conf, pos, nRows, nCols, nModel, map); output[0] = mxMap; // mxSetPr(output[0],conf); mxFree(conf); mxFree(model_w); mxFree(prob); }
static ex_t display_words(bfpath *bfp, int argc, char **argv, bool show_probability) { byte buf[BUFSIZE]; buff_t *buff = buff_new(buf, 0, BUFSIZE); const byte *word; const char *path = bfp->filepath; const char *head_format = !show_probability ? "%-30s %6s %6s\n" : "%-30s %6s %6s %6s\n"; const char *data_format = !show_probability ? "%-30s %6lu %6lu\n" : "%-30s %6lu %6lu %f\n"; void *dsh = NULL; /* initialize to silence bogus gcc warning */ void *dbe; int rv = 0; ex_t ec = EX_OK; dsv_t msgcnts; /* protect against broken stat(2) that succeeds for empty names */ if (path == NULL || *path == '\0') { fprintf(stderr, "Expecting non-empty directory or file name.\n"); return EX_ERROR; } dbe = ds_init(bfp); dsh = ds_open(dbe, bfp, DS_READ);; if (dsh == NULL) /* print error, cleanup, and exit */ ds_open_failure(bfp, dbe); if (DST_OK != ds_txn_begin(dsh)) { ds_close(dsh); ds_cleanup(dbe); fprintf(stderr, "Cannot begin transaction.\n"); return EX_ERROR; } if (show_probability) { ds_get_msgcounts(dsh, &msgcnts); robs = ROBS; robx = ROBX; } fprintf(fpo, head_format, "", "spam", "good", " Fisher"); while (argc >= 0) { dsv_t val; word_t *token; int rc; unsigned long spam_count; unsigned long good_count; double rob_prob = 0.0; if (argc == 0) { if (get_token(buff, stdin) != 0) break; token = &buff->t; } else { word = (const byte *) *argv++; if (--argc == 0) argc = -1; token = word_news((const char *)word); } rc = ds_read(dsh, token, &val); switch (rc) { case 0: spam_count = val.spamcount; good_count = val.goodcount; if (!show_probability) fprintf(fpo, data_format, token->u.text, spam_count, good_count); else { rob_prob = calc_prob(good_count, spam_count, msgcnts.goodcount, msgcnts.spamcount); fprintf(fpo, data_format, token->u.text, spam_count, good_count, rob_prob); } break; case 1: break; default: fprintf(stderr, "Cannot read from database.\n"); ec = EX_ERROR; goto finish; } if (token != &buff->t) word_free(token); } finish: if (DST_OK != rv ? ds_txn_abort(dsh) : ds_txn_commit(dsh)) { fprintf(stderr, "Cannot %s transaction.\n", rv ? "abort" : "commit"); ec = EX_ERROR; } ds_close(dsh); ds_cleanup(dbe); buff_free(buff); return ec; }