static int acmod_read_senfh_header(acmod_t *acmod) { char **name, **val; int32 swap; int i; if (bio_readhdr(acmod->insenfh, &name, &val, &swap) < 0) goto error_out; for (i = 0; name[i] != NULL; ++i) { if (!strcmp(name[i], "n_sen")) { if (atoi(val[i]) != bin_mdef_n_sen(acmod->mdef)) { E_ERROR("Number of senones in senone file (%d) does not " "match mdef (%d)\n", atoi(val[i]), bin_mdef_n_sen(acmod->mdef)); goto error_out; } } if (!strcmp(name[i], "logbase")) { if (fabs(atof_c(val[i]) - logmath_get_base(acmod->lmath)) > 0.001) { E_ERROR("Logbase in senone file (%f) does not match acmod " "(%f)\n", atof_c(val[i]), logmath_get_base(acmod->lmath)); goto error_out; } } } acmod->insen_swap = swap; bio_hdrarg_free(name, val); return 0; error_out: bio_hdrarg_free(name, val); return -1; }
static int32 gauden_param_read(float32 ***** out_param, /* Alloc space iff *out_param == 0 */ int32 * out_n_mgau, int32 * out_n_feat, int32 * out_n_density, int32 ** out_veclen, const char *file_name) { char tmp; FILE *fp; int32 i, j, k, l, n, blk; int32 n_mgau; int32 n_feat; int32 n_density; int32 *veclen; int32 byteswap, chksum_present; float32 ****out; float32 *buf; char **argname, **argval; uint32 chksum; E_INFO("Reading mixture gaussian parameter: %s\n", file_name); if ((fp = fopen(file_name, "rb")) == 0) E_FATAL_SYSTEM("Failed to open file '%s' for reading", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) E_FATAL("Failed to read header from file '%s'\n", file_name); /* Parse argument-value list */ chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp(argname[i], "version") == 0) { if (strcmp(argval[i], GAUDEN_PARAM_VERSION) != 0) E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], GAUDEN_PARAM_VERSION); } else if (strcmp(argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free(argname, argval); argname = argval = 0; chksum = 0; /* #Codebooks */ if (bio_fread(&n_mgau, sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (#codebooks) failed\n", file_name); *out_n_mgau = n_mgau; /* #Features/codebook */ if (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (#features) failed\n", file_name); *out_n_feat = n_feat; /* #Gaussian densities/feature in each codebook */ if (bio_fread(&n_density, sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (#density/codebook) failed\n", file_name); *out_n_density = n_density; /* #Dimensions in each feature stream */ veclen = (int32*)ckd_calloc(n_feat, sizeof(uint32)); *out_veclen = veclen; if (bio_fread(veclen, sizeof(int32), n_feat, fp, byteswap, &chksum) != n_feat) E_FATAL("fread(%s) (feature-lengths) failed\n", file_name); /* blk = total vector length of all feature streams */ for (i = 0, blk = 0; i < n_feat; i++) blk += veclen[i]; /* #Floats to follow; for the ENTIRE SET of CODEBOOKS */ if (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (total #floats) failed\n", file_name); if (n != n_mgau * n_density * blk) { E_FATAL ("%s: #mfcc_ts(%d) doesn't match dimensions: %d x %d x %d\n", file_name, n, n_mgau, n_density, blk); } /* Allocate memory for mixture gaussian densities if not already allocated */ if (!(*out_param)) { out = (float32 ****) ckd_calloc_3d(n_mgau, n_feat, n_density, sizeof(float32 *)); buf = (float32 *) ckd_calloc(n, sizeof(float32)); for (i = 0, l = 0; i < n_mgau; i++) { for (j = 0; j < n_feat; j++) { for (k = 0; k < n_density; k++) { out[i][j][k] = &buf[l]; l += veclen[j]; } } } } else { out = (float32 ****) *out_param; buf = out[0][0][0]; } /* Read mixture gaussian densities data */ if (bio_fread(buf, sizeof(float32), n, fp, byteswap, &chksum) != n) E_FATAL("fread(%s) (densitydata) failed\n", file_name); if (chksum_present) bio_verify_chksum(fp, byteswap, chksum); if (fread(&tmp, 1, 1, fp) == 1) E_FATAL("More data than expected in %s\n", file_name); fclose(fp); *out_param = out; E_INFO("%d codebook, %d feature, size: \n", n_mgau, n_feat); for (i = 0; i < n_feat; i++) E_INFO(" %dx%d\n", n_density, veclen[i]); return 0; }
int32 feat_read_lda(feat_t *feat, const char *ldafile, int32 dim) { FILE *fh; int32 byteswap, chksum_present; uint32 chksum, i, m, n; char **argname, **argval; assert(feat); if (feat->n_stream != 1) { E_ERROR("LDA incompatible with multi-stream features (n_stream = %d)\n", feat->n_stream); return -1; } if ((fh = fopen(ldafile, "rb")) == NULL) { E_ERROR_SYSTEM("Failed to open transform file '%s' for reading", ldafile); return -1; } if (bio_readhdr(fh, &argname, &argval, &byteswap) < 0) { E_ERROR("Failed to read header from transform file '%s'\n", ldafile); fclose(fh); return -1; } chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp(argname[i], "version") == 0) { if (strcmp(argval[i], MATRIX_FILE_VERSION) != 0) E_WARN("%s: Version mismatch: %s, expecting %s\n", ldafile, argval[i], MATRIX_FILE_VERSION); } else if (strcmp(argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free(argname, argval); argname = argval = NULL; chksum = 0; if (feat->lda) ckd_free_3d((void ***)feat->lda); { /* Use a temporary variable to avoid strict-aliasing problems. */ void ***outlda; if (bio_fread_3d(&outlda, sizeof(float32), &feat->n_lda, &m, &n, fh, byteswap, &chksum) < 0) { E_ERROR_SYSTEM("%s: bio_fread_3d(lda) failed\n", ldafile); fclose(fh); return -1; } feat->lda = (void *)outlda; } fclose(fh); #ifdef FIXED_POINT /* FIXME: This is a fragile hack that depends on mfcc_t and * float32 being the same size (which they are, but...) */ for (i = 0; i < feat->n_lda * m * n; ++i) { feat->lda[0][0][i] = FLOAT2MFCC(((float *)feat->lda[0][0])[i]); } #endif /* Note that SphinxTrain stores the eigenvectors as row vectors. */ if (n != feat->stream_len[0]) E_FATAL("LDA matrix dimension %d doesn't match feature stream size %d\n", n, feat->stream_len[0]); /* Override dim from file if it is 0 or greater than m. */ if (dim > m || dim <= 0) { dim = m; } feat->out_dim = dim; return 0; }
static int32 senone_mgau_map_read(senone_t * s, char const *file_name) { FILE *fp; int32 byteswap, chksum_present, n_gauden_present; uint32 chksum; int32 i; char eofchk; char **argname, **argval; void *ptr; float32 v; E_INFO("Reading senone gauden-codebook map file: %s\n", file_name); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL_SYSTEM("Failed to open map file '%s' for reading", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) E_FATAL("Failed to read header from file '%s'\n", file_name); /* Parse argument-value list */ chksum_present = 0; n_gauden_present = 0; for (i = 0; argname[i]; i++) { if (strcmp(argname[i], "version") == 0) { if (strcmp(argval[i], SPDEF_PARAM_VERSION) != 0) { E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], SPDEF_PARAM_VERSION); } /* HACK!! Convert version# to float32 and take appropriate action */ if (sscanf(argval[i], "%f", &v) != 1) E_FATAL("%s: Bad version no. string: %s\n", file_name, argval[i]); n_gauden_present = (v > 1.1) ? 1 : 0; } else if (strcmp(argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free(argname, argval); argname = argval = NULL; chksum = 0; /* Read #gauden (if version matches) */ if (n_gauden_present) { E_INFO("Reading number of codebooks from %s\n", file_name); if (bio_fread (&(s->n_gauden), sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (#gauden) failed\n", file_name); } /* Read 1d array data */ if (bio_fread_1d(&ptr, sizeof(uint32), &(s->n_sen), fp, byteswap, &chksum) < 0) { E_FATAL("bio_fread_1d(%s) failed\n", file_name); } s->mgau = ptr; E_INFO("Mapping %d senones to %d codebooks\n", s->n_sen, s->n_gauden); /* Infer n_gauden if not present in this version */ if (!n_gauden_present) { s->n_gauden = 1; for (i = 0; i < s->n_sen; i++) if (s->mgau[i] >= s->n_gauden) s->n_gauden = s->mgau[i] + 1; } if (chksum_present) bio_verify_chksum(fp, byteswap, chksum); if (fread(&eofchk, 1, 1, fp) == 1) E_FATAL("More data than expected in %s: %d\n", file_name, eofchk); fclose(fp); E_INFO("Read %d->%d senone-codebook mappings\n", s->n_sen, s->n_gauden); return 1; }
static int32 senone_mixw_read(senone_t * s, char const *file_name, logmath_t *lmath) { char eofchk; FILE *fp; int32 byteswap, chksum_present; uint32 chksum; float32 *pdf; int32 i, f, c, p, n_err; char **argname, **argval; E_INFO("Reading senone mixture weights: %s\n", file_name); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL_SYSTEM("Failed to open mixture weights file '%s' for reading", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) E_FATAL("Failed to read header from file '%s'\n", file_name); /* Parse argument-value list */ chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp(argname[i], "version") == 0) { if (strcmp(argval[i], MIXW_PARAM_VERSION) != 0) E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], MIXW_PARAM_VERSION); } else if (strcmp(argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free(argname, argval); argname = argval = NULL; chksum = 0; /* Read #senones, #features, #codewords, arraysize */ if ((bio_fread(&(s->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread(&(s->n_feat), sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread(&(s->n_cw), sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread(&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name); } if (i != s->n_sen * s->n_feat * s->n_cw) { E_FATAL ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n", file_name, i, s->n_sen, s->n_feat, s->n_cw); } /* * Compute #LSB bits to be dropped to represent mixwfloor with 8 bits. * All PDF values will be truncated (in the LSB positions) by these many bits. */ if ((s->mixwfloor <= 0.0) || (s->mixwfloor >= 1.0)) E_FATAL("mixwfloor (%e) not in range (0, 1)\n", s->mixwfloor); /* Use a fixed shift for compatibility with everything else. */ E_INFO("Truncating senone logs3(pdf) values by %d bits\n", SENSCR_SHIFT); /* * Allocate memory for senone PDF data. Organize normally or transposed depending on * s->n_gauden. */ if (s->n_gauden > 1) { E_INFO("Not transposing mixture weights in memory\n"); s->pdf = (senprob_t ***) ckd_calloc_3d(s->n_sen, s->n_feat, s->n_cw, sizeof(senprob_t)); } else { E_INFO("Transposing mixture weights in memory\n"); s->pdf = (senprob_t ***) ckd_calloc_3d(s->n_feat, s->n_cw, s->n_sen, sizeof(senprob_t)); } /* Temporary structure to read in floats */ pdf = (float32 *) ckd_calloc(s->n_cw, sizeof(float32)); /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */ n_err = 0; for (i = 0; i < s->n_sen; i++) { for (f = 0; f < s->n_feat; f++) { if (bio_fread ((void *) pdf, sizeof(float32), s->n_cw, fp, byteswap, &chksum) != s->n_cw) { E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name); } /* Normalize and floor */ if (vector_sum_norm(pdf, s->n_cw) <= 0.0) n_err++; vector_floor(pdf, s->n_cw, s->mixwfloor); vector_sum_norm(pdf, s->n_cw); /* Convert to logs3, truncate to 8 bits, and store in s->pdf */ for (c = 0; c < s->n_cw; c++) { p = -(logmath_log(lmath, pdf[c])); p += (1 << (SENSCR_SHIFT - 1)) - 1; /* Rounding before truncation */ if (s->n_gauden > 1) s->pdf[i][f][c] = (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255; else s->pdf[f][c][i] = (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255; } } }
tmat_t *tmat_init (char *file_name, float64 tpfloor) { char tmp; int32 n_src, n_dst; FILE *fp; int32 byteswap, chksum_present; uint32 chksum; float32 **tp; int32 i, j, k, tp_per_tmat; char **argname, **argval; tmat_t *t; E_INFO("Reading HMM transition probability matrices: %s\n", file_name); t = (tmat_t *) ckd_calloc (1, sizeof(tmat_t)); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr (fp, &argname, &argval, &byteswap) < 0) E_FATAL("bio_readhdr(%s) failed\n", file_name); /* Parse argument-value list */ chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp (argname[i], "version") == 0) { if (strcmp(argval[i], TMAT_PARAM_VERSION) != 0) E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], TMAT_PARAM_VERSION); } else if (strcmp (argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free (argname, argval); argname = argval = NULL; chksum = 0; /* Read #tmat, #from-states, #to-states, arraysize */ if ((bio_fread (&(t->n_tmat), sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread (&n_src, sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread (&n_dst, sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread (&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name); } if (t->n_tmat >= MAX_S3TMATID) E_FATAL("%s: #tmat (%d) exceeds limit (%d)\n", file_name, t->n_tmat, MAX_S3TMATID); if (n_dst != n_src+1) E_FATAL("%s: #from-states(%d) != #to-states(%d)-1\n", file_name, n_src, n_dst); t->n_state = n_src; if (i != t->n_tmat * n_src * n_dst) { E_FATAL("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n", file_name, i, t->n_tmat, n_src, n_dst); } /* Allocate memory for tmat data */ t->tp = (int32 ***) ckd_calloc_3d (t->n_tmat, n_src, n_dst, sizeof(int32)); /* Temporary structure to read in the float data */ tp = (float32 **) ckd_calloc_2d (n_src, n_dst, sizeof(float32)); /* Read transition matrices, normalize and floor them, and convert to logs3 domain */ tp_per_tmat = n_src * n_dst; for (i = 0; i < t->n_tmat; i++) { if (bio_fread (tp[0], sizeof(float32), tp_per_tmat, fp, byteswap, &chksum) != tp_per_tmat) { E_FATAL("fread(%s) (arraydata) failed\n", file_name); } /* Normalize and floor */ for (j = 0; j < n_src; j++) { if (vector_sum_norm (tp[j], n_dst) == 0.0) E_ERROR("Normalization failed for tmat %d from state %d\n", i, j); vector_nz_floor (tp[j], n_dst, tpfloor); vector_sum_norm (tp[j], n_dst); /* Convert to logs3. Take care of special case when tp = 0.0! */ for (k = 0; k < n_dst; k++) t->tp[i][j][k] = (tp[j][k] == 0.0) ? S3_LOGPROB_ZERO : logs3(tp[j][k]); } } ckd_free_2d ((void **) tp); if (chksum_present) bio_verify_chksum (fp, byteswap, chksum); if (fread (&tmp, 1, 1, fp) == 1) E_ERROR("Non-empty file beyond end of data\n"); fclose(fp); E_INFO("Read %d transition matrices of size %dx%d\n", t->n_tmat, t->n_state, t->n_state+1); if (tmat_chk_uppertri (t) < 0) E_FATAL("Tmat not upper triangular\n"); return t; }
static int32 interp_read(interp_t * ip, const char *file_name) { FILE *fp; int32 byteswap, chksum_present; int32 i; char eofchk; float f; char **argname, **argval; uint32 chksum; E_INFO("Reading interpolation weights: %s\n", file_name); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) E_FATAL("bio_readhdr(%s) failed\n", file_name); /* Parse argument-value list */ chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp(argname[i], "version") == 0) { if (strcmp(argval[i], INTERP_VERSION) != 0) E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], INTERP_VERSION); } else if (strcmp(argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free(argname, argval); argname = argval = NULL; chksum = 0; /* Read #senones */ if (bio_fread(&(ip->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (arraysize) failed\n", file_name); if (ip->n_sen <= 0) E_FATAL("%s: arraysize= %d in header\n", file_name, ip->n_sen); ip->wt = (struct interp_wt_s *) ckd_calloc(ip->n_sen, sizeof(struct interp_wt_s)); for (i = 0; i < ip->n_sen; i++) { if (bio_fread(&f, sizeof(float32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (arraydata) failed\n", file_name); if ((f < 0.0) || (f > 1.0)) E_FATAL("%s: interpolation weight(%d)= %e\n", file_name, i, f); ip->wt[i].cd = (f == 0.0) ? S3_LOGPROB_ZERO : logs3(ip->logmath, f); ip->wt[i].ci = (f == 1.0) ? S3_LOGPROB_ZERO : logs3(ip->logmath, 1.0 - f); } if (chksum_present) bio_verify_chksum(fp, byteswap, chksum); if (fread(&eofchk, 1, 1, fp) == 1) E_FATAL("More data than expected in %s\n", file_name); fclose(fp); E_INFO("Read %d interpolation weights\n", ip->n_sen); return 1; }
/* Read a Sphinx3 mean or variance file. */ static int32 s3_read_mgau(s2_semi_mgau_t *s, const char *file_name, float32 ***out_cb) { char tmp; FILE *fp; int32 i, blk, n; int32 n_mgau; int32 n_feat; int32 n_density; int32 *veclen; int32 byteswap, chksum_present; char **argname, **argval; uint32 chksum; E_INFO("Reading S3 mixture gaussian file '%s'\n", file_name); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL("fopen(%s,rb) failed\n", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) E_FATAL("bio_readhdr(%s) failed\n", file_name); /* Parse argument-value list */ chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp(argname[i], "version") == 0) { if (strcmp(argval[i], MGAU_PARAM_VERSION) != 0) E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], MGAU_PARAM_VERSION); } else if (strcmp(argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free(argname, argval); argname = argval = NULL; chksum = 0; /* #Codebooks */ if (bio_fread(&n_mgau, sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (#codebooks) failed\n", file_name); if (n_mgau != 1) { E_ERROR("%s: #codebooks (%d) != 1\n", file_name, n_mgau); fclose(fp); return -1; } /* #Features/codebook */ if (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (#features) failed\n", file_name); if (s->n_feat == 0) s->n_feat = n_feat; else if (n_feat != s->n_feat) E_FATAL("#Features streams(%d) != %d\n", n_feat, s->n_feat); /* #Gaussian densities/feature in each codebook */ if (bio_fread(&n_density, sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (#density/codebook) failed\n", file_name); if (s->n_density == 0) s->n_density = n_density; else if (n_density != s->n_density) E_FATAL("%s: Number of densities per feature(%d) != %d\n", file_name, n_mgau, s->n_density); /* Vector length of feature stream */ veclen = ckd_calloc(s->n_feat, sizeof(int32)); if (bio_fread(veclen, sizeof(int32), s->n_feat, fp, byteswap, &chksum) != s->n_feat) E_FATAL("fread(%s) (feature vector-length) failed\n", file_name); for (i = 0, blk = 0; i < s->n_feat; ++i) { if (s->veclen[i] == 0) s->veclen[i] = veclen[i]; else if (veclen[i] != s->veclen[i]) E_FATAL("feature stream length %d is inconsistent (%d != %d)\n", i, veclen[i], s->veclen[i]); blk += veclen[i]; } /* #Floats to follow; for the ENTIRE SET of CODEBOOKS */ if (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (total #floats) failed\n", file_name); if (n != n_mgau * n_density * blk) E_FATAL ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n", file_name, n, n_mgau, n_density, blk); *out_cb = ckd_calloc(s->n_feat, sizeof(float32 *)); for (i = 0; i < s->n_feat; ++i) { (*out_cb)[i] = (float32 *) ckd_calloc(n_density * veclen[i], sizeof(float32)); if (bio_fread ((*out_cb)[i], sizeof(float32), n_density * veclen[i], fp, byteswap, &chksum) != n_density * veclen[i]) E_FATAL("fread(%s, %d) of feat %d failed\n", file_name, n_density * veclen[i], i); } ckd_free(veclen); if (chksum_present) bio_verify_chksum(fp, byteswap, chksum); if (fread(&tmp, 1, 1, fp) == 1) E_FATAL("%s: More data than expected\n", file_name); fclose(fp); E_INFO("%d mixture Gaussians, %d components, %d feature streams, veclen %d\n", n_mgau, n_density, n_feat, blk); return n; }
static int32 read_mixw(s2_semi_mgau_t * s, char const *file_name, double SmoothMin) { char **argname, **argval; char eofchk; FILE *fp; int32 byteswap, chksum_present; uint32 chksum; float32 *pdf; int32 i, f, c, n; int32 n_sen; int32 n_feat; int32 n_comp; int32 n_err; E_INFO("Reading mixture weights file '%s'\n", file_name); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL("fopen(%s,rb) failed\n", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) E_FATAL("bio_readhdr(%s) failed\n", file_name); /* Parse argument-value list */ chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp(argname[i], "version") == 0) { if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0) E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], MGAU_MIXW_VERSION); } else if (strcmp(argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free(argname, argval); argname = argval = NULL; chksum = 0; /* Read #senones, #features, #codewords, arraysize */ if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name); } if (n_feat != s->n_feat) E_FATAL("#Features streams(%d) != %d\n", n_feat, s->n_feat); if (n != n_sen * n_feat * n_comp) { E_FATAL ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n", file_name, i, n_sen, n_feat, n_comp); } /* n_sen = number of mixture weights per codeword, which is * fixed at the number of senones since we have only one codebook. */ s->n_sen = n_sen; /* Quantized mixture weight arrays. */ s->mixw = ckd_calloc_3d(s->n_feat, s->n_density, n_sen, sizeof(***s->mixw)); /* Temporary structure to read in floats before conversion to (int32) logs3 */ pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32)); /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */ n_err = 0; for (i = 0; i < n_sen; i++) { for (f = 0; f < n_feat; f++) { if (bio_fread((void *) pdf, sizeof(float32), n_comp, fp, byteswap, &chksum) != n_comp) { E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name); } /* Normalize and floor */ if (vector_sum_norm(pdf, n_comp) <= 0.0) n_err++; vector_floor(pdf, n_comp, SmoothMin); vector_sum_norm(pdf, n_comp); /* Convert to LOG, quantize, and transpose */ for (c = 0; c < n_comp; c++) { int32 qscr; qscr = -logmath_log(s->lmath_8b, pdf[c]); if ((qscr > MAX_NEG_MIXW) || (qscr < 0)) qscr = MAX_NEG_MIXW; s->mixw[f][c][i] = qscr; } } } if (n_err > 0) E_WARN("Weight normalization failed for %d senones\n", n_err); ckd_free(pdf); if (chksum_present) bio_verify_chksum(fp, byteswap, chksum); if (fread(&eofchk, 1, 1, fp) == 1) E_FATAL("More data than expected in %s\n", file_name); fclose(fp); E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp); return n_sen; }
/** Reading a regression map */ static int s3map_read(const char *fn, /**< The file name */ void **out_map, int32 * out_n_dom, int32 * out_n_rng, size_t map_elem_size) { uint32 rd_chksum = 0; uint32 sv_chksum; uint32 ignore; char *ver; char *do_chk; FILE *fp; int32 swap; char **argname, **argval; int i; if ((fp = fopen(fn, "rb")) == NULL) E_FATAL_SYSTEM("fopen(%s,rb) failed\n", fn); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr(fp, &argname, &argval, &swap) < 0) E_FATAL("bio_readhdr(%s) failed\n", fn); /* Parse argument-value list */ ver = do_chk = NULL; for (i = 0; argname[i]; i++) { if (strcmp(argname[i], "version") == 0) { if (strcmp(argval[i], MAP_FILE_VERSION) != 0) { E_FATAL("Version mismatch(%s): %s, expecting %s\n", fn, argval[i], MAP_FILE_VERSION); } ver = argval[i]; } else if (strcmp(argname[i], "chksum0") == 0) { do_chk = argval[i]; } } if (ver == NULL) E_FATAL("No version attribute for %s\n", fn); bio_hdrarg_free(argname, argval); argname = argval = NULL; if (bio_fread(out_n_rng, sizeof(uint32), 1, fp, swap, &rd_chksum) != 1) { fclose(fp); return S3_ERROR; } if (bio_fread_1d(out_map, map_elem_size, out_n_dom, fp, swap, &rd_chksum) < 0) { fclose(fp); return S3_ERROR; } if (do_chk) { if (bio_fread(&sv_chksum, sizeof(uint32), 1, fp, swap, &ignore) != 1) { fclose(fp); return S3_ERROR; } if (sv_chksum != rd_chksum) { E_FATAL("Checksum error; read corrupted data.\n"); } } E_INFO("Read %s [%u mappings to %u]\n", fn, *out_n_dom, *out_n_rng); return S3_SUCCESS; }
static int32 senone_mgau_map_read (senone_t *s, const char *file_name) { FILE *fp; int32 byteswap, chksum_present, n_mgau_present; uint32 chksum; int32 i; char eofchk; char **argname, **argval; float32 v; E_INFO("Reading senone-codebook map file: %s\n", file_name); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr (fp, &argname, &argval, &byteswap) < 0) E_FATAL("bio_readhdr(%s) failed\n", file_name); /* Parse argument-value list */ chksum_present = 0; n_mgau_present = 0; for (i = 0; argname[i]; i++) { if (strcmp (argname[i], "version") == 0) { if (strcmp(argval[i], SPDEF_PARAM_VERSION) != 0) { E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], SPDEF_PARAM_VERSION); } /* HACK!! Convert version# to float32 and take appropriate action */ if (sscanf (argval[i], "%f", &v) != 1) E_FATAL("%s: Bad version no. string: %s\n", file_name, argval[i]); n_mgau_present = (v > 1.1) ? 1 : 0; } else if (strcmp (argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free (argname, argval); argname = argval = NULL; chksum = 0; /* Read #gauden (if version matches) */ if (n_mgau_present) { if (bio_fread (&(s->n_mgau), sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (#gauden) failed\n", file_name); } /* Read 1d array data; s->sen2mgau allocated by called function */ if (bio_fread_1d ((void **)(&s->sen2mgau), sizeof(int32), &(s->n_sen), fp, byteswap, &chksum) < 0) { E_FATAL("bio_fread_1d(%s) failed\n", file_name); } /* Infer n_mgau if not present in this version */ if (! n_mgau_present) { s->n_mgau = 1; for (i = 0; i < s->n_sen; i++) { if (s->sen2mgau[i] >= s->n_mgau) s->n_mgau = s->sen2mgau[i]+1; } } if (s->n_sen >= MAX_SENID) E_FATAL("%s: #senones (%d) exceeds limit (%d)\n", file_name, s->n_sen, MAX_SENID); if (s->n_mgau >= MAX_MGAUID) E_FATAL("%s: #gauden (%d) exceeds limit (%d)\n", file_name, s->n_mgau, MAX_MGAUID); /* Check for validity of mappings */ for (i = 0; i < s->n_sen; i++) { if ((s->sen2mgau[i] >= s->n_mgau) || NOT_MGAUID(s->sen2mgau[i])) E_FATAL("Bad sen2mgau[%d]= %d, out of range [0, %d)\n", i, s->sen2mgau[i], s->n_mgau); } if (chksum_present) bio_verify_chksum (fp, byteswap, chksum); if (fread (&eofchk, 1, 1, fp) == 1) E_FATAL("More data than expected in %s\n", file_name); fclose(fp); E_INFO("Read %d->%d senone-codebook mappings\n", s->n_sen, s->n_mgau); return 0; }
static int32 senone_mixw_read(logmath_t * logmath, senone_t *s, const char *file_name, float64 mixwfloor) { FILE *fp; char **argname, **argval; int32 byteswap, chksum_present; uint32 chksum; float32 *pdf; int32 i, j, f, m, c, p, n_sen, n_err, n_cw, nval; char eofchk; mixw_t *fw; E_INFO("Reading senone mixture weights: %s\n", file_name); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr (fp, &argname, &argval, &byteswap) < 0) E_FATAL("bio_readhdr(%s) failed\n", file_name); /* Parse argument-value list */ chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp (argname[i], "version") == 0) { if (strcmp(argval[i], MIXW_PARAM_VERSION) != 0) E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], MIXW_PARAM_VERSION); } else if (strcmp (argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free (argname, argval); argname = argval = NULL; chksum = 0; /* Read #senones, #features, #codewords, arraysize */ n_sen = s->n_sen; if ((bio_fread (&(s->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread (&(s->n_feat), sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread (&(n_cw), sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread (&nval, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name); } if ((n_sen != 0) && (s->n_sen != n_sen)) E_FATAL("#senones(%d) conflict with mapping file(%d)\n", s->n_sen, n_sen); if (s->n_sen >= MAX_SENID) E_FATAL("%s: #senones (%d) exceeds limit (%d)\n", file_name, s->n_sen, MAX_SENID); if (s->n_feat <= 0) E_FATAL("Bad #features: %d\n", s->n_feat); if (n_cw <= 0) E_FATAL("Bad #mixing-wts/senone: %d\n", n_cw); /* Allocate sen2mgau map if not yet done so (i.e. no explicit mapping file given */ if (! s->sen2mgau) { assert ((s->n_mgau == 0) || (s->n_mgau == 1)); s->sen2mgau = (uint32 *) ckd_calloc (s->n_sen, sizeof(int32)); if (s->n_mgau == 1) { /* Semicontinuous mode; all senones map to single, shared gaussian: 0 */ for (i = 0; i < s->n_sen; i++) s->sen2mgau[i] = 0; } else { /* Fully continuous mode; each senone maps to own parent gaussian */ s->n_mgau = s->n_sen; for (i = 0; i < s->n_sen; i++) s->sen2mgau[i] = i; } } else assert (s->n_mgau != 0); if (s->n_mgau >= MAX_MGAUID) E_FATAL("%s: #gauden (%d) exceeds limit (%d)\n", file_name, s->n_mgau, MAX_MGAUID); if (nval != s->n_sen * s->n_feat * n_cw) { E_FATAL("%s: #float32 values(%d) doesn't match dimensions: %d x %d x %d\n", file_name, nval, s->n_sen, s->n_feat, n_cw); } /* * Compute #LSB bits to be dropped to represent mixwfloor with 8 bits. * All PDF values will be truncated (in the LSB positions) by these many bits. */ if ((mixwfloor <= 0.0) || (mixwfloor >= 1.0)) E_FATAL("mixwfloor (%e) not in range (0, 1)\n", mixwfloor); /* Allocate memory for s->mgau2sen and senone PDF data */ build_mgau2sen (s, n_cw); /* Temporary structure to read in floats */ pdf = (float32 *) ckd_calloc (n_cw, sizeof(float32)); /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */ n_err = 0; for (i = 0; i < s->n_sen; i++) { m = s->sen2mgau[i]; /* Parent mgau */ j = s->mgau2sen_idx[i]; /* Index of senone i within list of senones for mgau m */ fw = s->mgau2sen[m].feat_mixw; for (f = 0; f < s->n_feat; f++) { if (bio_fread((void *)pdf, sizeof(float32), n_cw, fp, byteswap, &chksum) != n_cw) { E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name); } /* Normalize and floor */ if (vector_sum_norm (pdf, n_cw) == 0.0) n_err++; vector_floor (pdf, n_cw, mixwfloor); vector_sum_norm (pdf, n_cw); /* Convert to logs3, truncate to 8 bits, and store in s->pdf */ for (c = 0; c < n_cw; c++) { p = -logmath_log(logmath, pdf[c]); printf ("%f %d\n", pdf[c], p); fw[f].prob[j][c] = p; } } } if (n_err > 0) E_WARN("Weight normalization failed for %d senones\n", n_err); ckd_free (pdf); if (chksum_present) bio_verify_chksum (fp, byteswap, chksum); if (fread (&eofchk, 1, 1, fp) == 1) E_FATAL("More data than expected in %s\n", file_name); fclose(fp); E_INFO("Read mixture weights for %d senones: %d features x %d codewords\n", s->n_sen, s->n_feat, n_cw); return 0; }