/** * Process PCM audio from a filehandle. Assume that wtf->infh is * positioned just after the file header. */ static int decode_pcm(sphinx_wave2feat_t *wtf) { size_t nsamp; int32 n, nfr, nchans, whichchan; uint32 nfloat; nchans = cmd_ln_int32_r(wtf->config, "-nchans"); whichchan = cmd_ln_int32_r(wtf->config, "-whichchan"); fe_start_utt(wtf->fe); nfloat = 0; while ((nsamp = fread(wtf->audio, 2, wtf->blocksize, wtf->infh)) != 0) { size_t nvec; int16 const *inspeech; /* Byteswap stuff here if necessary. */ if (wtf->byteswap) { for (n = 0; n < nsamp; ++n) SWAP_INT16(wtf->audio + n); } /* Mix or pick channels. */ if (nchans > 1) nsamp = mixnpick_channels(wtf->audio, nsamp, nchans, whichchan); inspeech = wtf->audio; nvec = wtf->featsize; /* Consume all samples. */ while (nsamp) { nfr = nvec; fe_process_frames(wtf->fe, &inspeech, &nsamp, wtf->feat, &nfr, NULL); if (nfr) { if ((n = (*wtf->ot->output_frames)(wtf, wtf->feat, nfr)) < 0) return -1; nfloat += n; } } inspeech = wtf->audio; } /* Now process any leftover audio frames. */ fe_end_utt(wtf->fe, wtf->feat[0], &nfr); if (nfr) { if ((n = (*wtf->ot->output_frames)(wtf, wtf->feat, nfr)) < 0) return -1; nfloat += n; } if (fclose(wtf->infh) == EOF) E_ERROR_SYSTEM("Failed to close input file"); wtf->infh = NULL; return nfloat; }
static int32 file_ad_read(ad_rec_t * r, int16 * buf, int32 max) { int32 i, k; k = fread(buf, sizeof(int16), max, infp); if (WORDS_BIGENDIAN) { for (i = 0; i < k; i++) { SWAP_INT16(&buf[i]); } } return ((k > 0) ? k : -1); }
/** * Output HTK format header. */ static int output_header_htk(sphinx_wave2feat_t *wtf, int32 nfloat) { int32 samp_period; int16 samp_size; int16 param_kind; int swap = FALSE; /* HTK files are big-endian. */ if (0 == strcmp("little", cmd_ln_str_r(wtf->config, "-mach_endian"))) swap = TRUE; /* Same file size thing as in Sphinx files (I think) */ if (swap) SWAP_INT32(&nfloat); if (fwrite(&nfloat, 4, 1, wtf->outfh) != 1) return -1; /* Sample period in 100ns units. */ samp_period = (int32)(1e+7 / cmd_ln_float32_r(wtf->config, "-frate")); if (swap) SWAP_INT32(&samp_period); if (fwrite(&samp_period, 4, 1, wtf->outfh) != 1) return -1; /* Sample size - veclen * sizeof each sample. */ samp_size = wtf->veclen * 4; if (swap) SWAP_INT16(&samp_size); if (fwrite(&samp_size, 2, 1, wtf->outfh) != 1) return -1; /* Format and flags. */ if (cmd_ln_boolean_r(wtf->config, "-logspec") || cmd_ln_boolean_r(wtf->config, "-cep2spec")) param_kind = FBANK; /* log mel-filter bank outputs */ else param_kind = MFCC | _O; /* MFCC + CEP0 (note reordering...) */ if (swap) SWAP_INT16(¶m_kind); if (fwrite(¶m_kind, 2, 1, wtf->outfh) != 1) return -1; return 0; }
/********************************************************************* FUNCTION: fe_process_frame PARAMETERS: fe_t *FE, int16 *spch, int32 nsamps, mfcc_t *fr_cep RETURNS: status, successful or not DESCRIPTION: processes the given speech data and returns features. Modified to process one frame of speech only. **********************************************************************/ int32 fe_process_frame(fe_t * FE, int16 * spch, int32 nsamps, mfcc_t * fr_cep) { int32 spbuf_len, i; frame_t *spbuf; int32 return_value = FE_SUCCESS; spbuf_len = FE->FRAME_SIZE; /* assert(spbuf_len <= nsamps); */ if ((spbuf = (frame_t *) calloc(spbuf_len, sizeof(frame_t))) == NULL) { E_FATAL("memory alloc failed in fe_process_frame()...exiting\n"); } /* Added byte-swapping for Endian-ness compatibility */ if (FE->swap) for (i = 0; i < nsamps; i++) SWAP_INT16(&spch[i]); /* Add dither, if need. Warning: this may add dither twice to the samples in overlapping frames. */ if (FE->dither) { fe_dither(spch, spbuf_len); } /* pre-emphasis if needed,convert from int16 to float64 */ if (FE->PRE_EMPHASIS_ALPHA != 0.0) { fe_pre_emphasis(spch, spbuf, spbuf_len, FE->PRE_EMPHASIS_ALPHA, FE->PRIOR); FE->PRIOR = spch[FE->FRAME_SHIFT - 1]; /* Z.A.B for frame by frame analysis */ } else { fe_short_to_frame(spch, spbuf, spbuf_len); } /* frame based processing - let's make some cepstra... */ fe_hamming_window(spbuf, FE->HAMMING_WINDOW, FE->FRAME_SIZE, FE->remove_dc); return_value = fe_frame_to_fea(FE, spbuf, fr_cep); free(spbuf); return return_value; }
// Loads the texture from the specified file and stores it in iTexture. Note // that we're using the GLAUX library here, which is generally discouraged, // but in this case spares us having to write a bitmap loading routine. GLuint LoadTexture(const char* const filename) { #pragma pack(1) struct TGAHEADER { GLbyte identsize; // Size of ID field that follows header (0) GLbyte colorMapType; // 0 = None, 1 = palette GLbyte imageType; // 0 = none, 1 = indexed, 2 = rgb, 3 = grey, +8=rle unsigned short colorMapStart; // First color map entry unsigned short colorMapLength; // Number of colors unsigned char colorMapBits; // bits per palette entry unsigned short xstart; // image x origin unsigned short ystart; // image y origin unsigned short width; // width in pixels unsigned short height; // height in pixels GLbyte bits; // bits per pixel (8 16, 24, 32) GLbyte descriptor; // image descriptor }; #pragma pack(8) char fullPathName[TERXTURE_PATH_NAME_SIZE ]; GetWorkingFileName (filename, fullPathName); TextureCache& cache = TextureCache::GetChache(); GLuint texture = cache.GetTexture(fullPathName); if (!texture) { FILE* const pFile = fopen (fullPathName, "rb"); if(pFile == NULL) { return 0; } //_ASSERTE (sizeof (TGAHEADER) == 18); // Read in header (binary) sizeof(TGAHEADER) = 18 TGAHEADER tgaHeader; // TGA file header fread(&tgaHeader, 18, 1, pFile); // Do byte swap for big vs little Indian tgaHeader.colorMapStart = SWAP_INT16(tgaHeader.colorMapStart); tgaHeader.colorMapLength = SWAP_INT16(tgaHeader.colorMapLength); tgaHeader.xstart = SWAP_INT16(tgaHeader.xstart); tgaHeader.ystart = SWAP_INT16(tgaHeader.ystart); tgaHeader.width = SWAP_INT16(tgaHeader.width); tgaHeader.height = SWAP_INT16(tgaHeader.height); // Get width, height, and depth of texture GLint iWidth = tgaHeader.width; GLint iHeight = tgaHeader.height; short sDepth = tgaHeader.bits / 8; _ASSERTE ((sDepth == 3) || (sDepth == 4)); // Put some validity checks here. Very simply, I only understand // or care about 8, 24, or 32 bit targa's. if(tgaHeader.bits != 8 && tgaHeader.bits != 24 && tgaHeader.bits != 32) { fclose(pFile); return 0; } // Calculate size of image buffer unsigned lImageSize = tgaHeader.width * tgaHeader.height * sDepth; // Allocate memory and check for success GLbyte* const pBits = new GLbyte [tgaHeader.width * tgaHeader.height * 4]; if(pBits == NULL) { fclose(pFile); return 0; } // Read in the bits // Check for read error. This should catch RLE or other // weird formats that I don't want to recognize if(fread(pBits, lImageSize, 1, pFile) != 1) { fclose(pFile); delete[] pBits; return 0; } GLenum eFormat = GL_RGBA; GLint iComponents = 4; switch(sDepth) { // intel arch case 3: // Most likely case //eFormat = GL_BGR_EXT; eFormat = GL_BGR; //iComponents = GL_RGB; iComponents = 4; break; case 4: eFormat = GL_BGRA; //eFormat = GL_BGRA_EXT; //iComponents = GL_RGBA; iComponents = 4; break; case 1: eFormat = GL_LUMINANCE; iComponents = 1; break; }; texture = 0; glGenTextures(1, &texture); if (texture) { //GLenum errr = glGetError (); glBindTexture(GL_TEXTURE_2D, texture); // select modulate to mix texture with color for shading glTexEnvf( GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE ); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); // glTexImage2D(GL_TEXTURE_2D, 0, iComponents, iWidth, iHeight, 0, eFormat, GL_UNSIGNED_BYTE, pBits); // when texture area is small, bilinear filter the closest mipmap // glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_NEAREST ); // when texture area is small, trilinear filter mipmaped glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); // when texture area is large, bilinear filter the first mipmap glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR ); // build our texture mipmaps gluBuild2DMipmaps (GL_TEXTURE_2D, iComponents, iWidth, iHeight, eFormat, GL_UNSIGNED_BYTE, pBits); // Done with File fclose(pFile); delete[] pBits; cache.InsertText (fullPathName, texture); } } return texture; }
unsigned SWAP_INT32(unsigned x) { return SWAP_INT16 ( x >> 16) + (SWAP_INT16 (x) << 16); }
/* * Write state segmentation in Sphinx-II format. (Must be written in BIG-ENDIAN * format!) */ static void write_s2stseg (char *dir, align_stseg_t *stseg, char *uttid, char *ctlspec) { char filename[1024]; FILE *fp; align_stseg_t *tmp; int32 k; s3cipid_t ci[3]; word_posn_t wpos; int16 s2_info; char buf[8]; static int32 byterev = -1; /* Whether to byte reverse output data */ build_output_uttfile (filename, dir, uttid, ctlspec); strcat (filename, ".v8_seg"); /* .v8_seg for compatibility */ E_INFO("Writing Sphinx-II format state segmentation to: %s\n", filename); if ((fp = fopen (filename, "wb")) == NULL) { E_ERROR("fopen(%s,wb) failed\n", filename); return; } if (byterev < 0) { /* Byte ordering of host machine unknown; first figure it out */ k = (int32) BYTE_ORDER_MAGIC; if (fwrite (&k, sizeof(int32), 1, fp) != 1) goto write_error; fclose (fp); if ((fp = fopen (filename, "rb")) == NULL) { E_ERROR ("fopen(%s,rb) failed\n", filename); return; } if (fread (buf, 1, sizeof(int32), fp) != sizeof(int32)) { E_ERROR ("fread(%s) failed\n", filename); return; } fclose (fp); /* If buf[0] == lsB of BYTE_ORDER_MAGIC, we are little-endian. Need to byterev */ byterev = (buf[0] == (BYTE_ORDER_MAGIC & 0x000000ff)) ? 1 : 0; if ((fp = fopen (filename, "wb")) == NULL) { E_ERROR("fopen(%s,wb) failed\n", filename); return; } } /* Write #frames */ for (k = 0, tmp = stseg; tmp; k++, tmp = tmp->next); if (byterev) SWAP_INT32(&k); if (fwrite (&k, sizeof(int32), 1, fp) != 1) goto write_error; /* Write state info for each frame */ for (; stseg; stseg = stseg->next) { mdef_phone_components (mdef, stseg->pid, ci, &(ci[1]), &(ci[2]), &wpos); s2_info = ci[0] * mdef->n_emit_state + stseg->state; if (stseg->start) s2_info |= 0x8000; if (byterev) SWAP_INT16(&s2_info); if (fwrite (&s2_info, sizeof(int16), 1, fp) != 1) goto write_error; } fclose (fp); return; write_error: E_ERROR("fwrite(%s) failed\n", filename); fclose (fp); }
static int read_riff_header(FILE *infh) { char id[4]; int32 intval, header_len; int16 shortval; /* RIFF files are little-endian by definition. */ cmd_ln_set_str("-input_endian", "little"); /* Read in all the header chunks and etcetera. */ TRY_FREAD(id, 1, 4, infh); /* Total file length (we don't care) */ TRY_FREAD(&intval, 4, 1, infh); /* 'WAVE' */ TRY_FREAD(id, 1, 4, infh); if (0 != memcmp(id, "WAVE", 4)) { E_ERROR("This is not a WAVE file\n"); goto error_out; } /* 'fmt ' */ TRY_FREAD(id, 1, 4, infh); if (0 != memcmp(id, "fmt ", 4)) { E_ERROR("Format chunk missing\n"); goto error_out; } /* Length of 'fmt ' chunk */ TRY_FREAD(&intval, 4, 1, infh); SWAP_INT32(&intval); header_len = intval; /* Data format. */ TRY_FREAD(&shortval, 2, 1, infh); SWAP_INT16(&shortval); if (shortval != 1) { /* PCM */ E_ERROR("WAVE file is not in PCM format\n"); goto error_out; } /* Number of channels. */ TRY_FREAD(&shortval, 2, 1, infh); SWAP_INT16(&shortval); if (shortval != 1) { /* PCM */ E_ERROR("WAVE file is not single channel\n"); goto error_out; } /* Sampling rate (finally!) */ TRY_FREAD(&intval, 4, 1, infh); SWAP_INT32(&intval); if (cmd_ln_int32("-samprate") == 0) cmd_ln_set_int32("-samprate", intval); else if (cmd_ln_int32("-samprate") != intval) { E_WARN("WAVE file sampling rate %d != -samprate %d\n", intval, cmd_ln_int32("-samprate")); } /* Average bytes per second (we don't care) */ TRY_FREAD(&intval, 4, 1, infh); /* Block alignment (we don't care) */ TRY_FREAD(&shortval, 2, 1, infh); /* Bits per sample (must be 16) */ TRY_FREAD(&shortval, 2, 1, infh); SWAP_INT16(&shortval); if (shortval != 16) { E_ERROR("WAVE file is not 16-bit\n"); goto error_out; } /* Any extra parameters. */ if (header_len > 16) fseek(infh, header_len - 16, SEEK_CUR); /* Now skip to the 'data' chunk. */ while (1) { TRY_FREAD(id, 1, 4, infh); if (0 == memcmp(id, "data", 4)) { /* Total number of bytes of data (we don't care). */ TRY_FREAD(&intval, 4, 1, infh); break; } else { /* Some other stuff... */ /* Number of bytes of ... whatever */ TRY_FREAD(&intval, 4, 1, infh); SWAP_INT32(&intval); fseek(infh, intval, SEEK_CUR); } } /* We are ready to rumble. */ return 0; error_out: return -1; }
bin_mdef_t * bin_mdef_read(cmd_ln_t *config, const char *filename) { bin_mdef_t *m; FILE *fh; size_t tree_start; int32 val, i, swap, pos, end; int32 *sseq_size; int do_mmap; /* Try to read it as text first. */ if ((m = bin_mdef_read_text(config, filename)) != NULL) return m; E_INFO("Reading binary model definition: %s\n", filename); if ((fh = fopen(filename, "rb")) == NULL) return NULL; if (fread(&val, 4, 1, fh) != 1) { fclose(fh); E_ERROR_SYSTEM("Failed to read byte-order marker from %s\n", filename); return NULL; } swap = 0; if (val == BIN_MDEF_OTHER_ENDIAN) { swap = 1; E_INFO("Must byte-swap %s\n", filename); } if (fread(&val, 4, 1, fh) != 1) { fclose(fh); E_ERROR_SYSTEM("Failed to read version from %s\n", filename); return NULL; } if (swap) SWAP_INT32(&val); if (val > BIN_MDEF_FORMAT_VERSION) { E_ERROR("File format version %d for %s is newer than library\n", val, filename); fclose(fh); return NULL; } if (fread(&val, 4, 1, fh) != 1) { fclose(fh); E_ERROR_SYSTEM("Failed to read header length from %s\n", filename); return NULL; } if (swap) SWAP_INT32(&val); /* Skip format descriptor. */ fseek(fh, val, SEEK_CUR); /* Finally allocate it. */ m = ckd_calloc(1, sizeof(*m)); m->refcnt = 1; /* Check these, to make gcc/glibc shut up. */ #define FREAD_SWAP32_CHK(dest) \ if (fread((dest), 4, 1, fh) != 1) { \ fclose(fh); \ ckd_free(m); \ E_ERROR_SYSTEM("Failed to read %s from %s\n", #dest, filename); \ return NULL; \ } \ if (swap) SWAP_INT32(dest); FREAD_SWAP32_CHK(&m->n_ciphone); FREAD_SWAP32_CHK(&m->n_phone); FREAD_SWAP32_CHK(&m->n_emit_state); FREAD_SWAP32_CHK(&m->n_ci_sen); FREAD_SWAP32_CHK(&m->n_sen); FREAD_SWAP32_CHK(&m->n_tmat); FREAD_SWAP32_CHK(&m->n_sseq); FREAD_SWAP32_CHK(&m->n_ctx); FREAD_SWAP32_CHK(&m->n_cd_tree); FREAD_SWAP32_CHK(&m->sil); /* CI names are first in the file. */ m->ciname = ckd_calloc(m->n_ciphone, sizeof(*m->ciname)); /* Decide whether to read in the whole file or mmap it. */ do_mmap = config ? cmd_ln_boolean_r(config, "-mmap") : TRUE; if (swap) { E_WARN("-mmap specified, but mdef is other-endian. Will not memory-map.\n"); do_mmap = FALSE; } /* Actually try to mmap it. */ if (do_mmap) { m->filemap = mmio_file_read(filename); if (m->filemap == NULL) do_mmap = FALSE; } pos = ftell(fh); if (do_mmap) { /* Get the base pointer from the memory map. */ m->ciname[0] = (char *)mmio_file_ptr(m->filemap) + pos; /* Success! */ m->alloc_mode = BIN_MDEF_ON_DISK; } else { /* Read everything into memory. */ m->alloc_mode = BIN_MDEF_IN_MEMORY; fseek(fh, 0, SEEK_END); end = ftell(fh); fseek(fh, pos, SEEK_SET); m->ciname[0] = ckd_malloc(end - pos); if (fread(m->ciname[0], 1, end - pos, fh) != end - pos) E_FATAL("Failed to read %d bytes of data from %s\n", end - pos, filename); } for (i = 1; i < m->n_ciphone; ++i) m->ciname[i] = m->ciname[i - 1] + strlen(m->ciname[i - 1]) + 1; /* Skip past the padding. */ tree_start = m->ciname[i - 1] + strlen(m->ciname[i - 1]) + 1 - m->ciname[0]; tree_start = (tree_start + 3) & ~3; m->cd_tree = (cd_tree_t *) (m->ciname[0] + tree_start); if (swap) { for (i = 0; i < m->n_cd_tree; ++i) { SWAP_INT16(&m->cd_tree[i].ctx); SWAP_INT16(&m->cd_tree[i].n_down); SWAP_INT32(&m->cd_tree[i].c.down); } } m->phone = (mdef_entry_t *) (m->cd_tree + m->n_cd_tree); if (swap) { for (i = 0; i < m->n_phone; ++i) { SWAP_INT32(&m->phone[i].ssid); SWAP_INT32(&m->phone[i].tmat); } } sseq_size = (int32 *) (m->phone + m->n_phone); if (swap) SWAP_INT32(sseq_size); m->sseq = ckd_calloc(m->n_sseq, sizeof(*m->sseq)); m->sseq[0] = (uint16 *) (sseq_size + 1); if (swap) { for (i = 0; i < *sseq_size; ++i) SWAP_INT16(m->sseq[0] + i); } if (m->n_emit_state) { for (i = 1; i < m->n_sseq; ++i) m->sseq[i] = m->sseq[0] + i * m->n_emit_state; } else { m->sseq_len = (uint8 *) (m->sseq[0] + *sseq_size); for (i = 1; i < m->n_sseq; ++i) m->sseq[i] = m->sseq[i - 1] + m->sseq_len[i - 1]; } /* Now build the CD-to-CI mappings using the senone sequences. * This is the only really accurate way to do it, though it is * still inaccurate in the case of heterogeneous topologies or * cross-state tying. */ m->cd2cisen = (int16 *) ckd_malloc(m->n_sen * sizeof(*m->cd2cisen)); m->sen2cimap = (int16 *) ckd_malloc(m->n_sen * sizeof(*m->sen2cimap)); /* Default mappings (identity, none) */ for (i = 0; i < m->n_ci_sen; ++i) m->cd2cisen[i] = i; for (; i < m->n_sen; ++i) m->cd2cisen[i] = -1; for (i = 0; i < m->n_sen; ++i) m->sen2cimap[i] = -1; for (i = 0; i < m->n_phone; ++i) { int32 j, ssid = m->phone[i].ssid; for (j = 0; j < bin_mdef_n_emit_state_phone(m, i); ++j) { int s = bin_mdef_sseq2sen(m, ssid, j); int ci = bin_mdef_pid2ci(m, i); /* Take the first one and warn if we have cross-state tying. */ if (m->sen2cimap[s] == -1) m->sen2cimap[s] = ci; if (m->sen2cimap[s] != ci) E_WARN ("Senone %d is shared between multiple base phones\n", s); if (j > bin_mdef_n_emit_state_phone(m, ci)) E_WARN("CD phone %d has fewer states than CI phone %d\n", i, ci); else m->cd2cisen[s] = bin_mdef_sseq2sen(m, m->phone[ci].ssid, j); } } /* Set the silence phone. */ m->sil = bin_mdef_ciphone_id(m, S3_SILENCE_CIPHONE); E_INFO ("%d CI-phone, %d CD-phone, %d emitstate/phone, %d CI-sen, %d Sen, %d Sen-Seq\n", m->n_ciphone, m->n_phone - m->n_ciphone, m->n_emit_state, m->n_ci_sen, m->n_sen, m->n_sseq); fclose(fh); return m; }
/********************************************************************* FUNCTION: fe_process_utt PARAMETERS: fe_t *FE, int16 *spch, int32 nsamps, mfcc_t **cep, int32 nframes RETURNS: status, successful or not DESCRIPTION: processes the given speech data and returns features. will prepend overflow data from last call and store new overflow data within the FE **********************************************************************/ int32 fe_process_utt(fe_t * FE, int16 * spch, int32 nsamps, mfcc_t *** cep_block, int32 * nframes) { int32 frame_start, frame_count = 0, whichframe = 0; int32 i, spbuf_len, offset = 0; frame_t *spbuf, *fr_data; int16 *tmp_spch = spch; mfcc_t **cep = NULL; int32 return_value = FE_SUCCESS; int32 frame_return_value; /* Added byte-swapping for Endian-ness compatibility */ if (FE->swap) for (i = 0; i < nsamps; i++) SWAP_INT16(&spch[i]); /* are there enough samples to make at least 1 frame? */ if (nsamps + FE->NUM_OVERFLOW_SAMPS >= FE->FRAME_SIZE) { /* if there are previous samples, pre-pend them to input speech samps */ if ((FE->NUM_OVERFLOW_SAMPS > 0)) { if ((tmp_spch = (int16 *) malloc(sizeof(int16) * (FE->NUM_OVERFLOW_SAMPS + nsamps))) == NULL) { E_WARN("memory alloc failed in fe_process_utt()\n"); return FE_MEM_ALLOC_ERROR; } /* RAH */ memcpy(tmp_spch, FE->OVERFLOW_SAMPS, FE->NUM_OVERFLOW_SAMPS * (sizeof(int16))); /* RAH */ memcpy(tmp_spch + FE->NUM_OVERFLOW_SAMPS, spch, nsamps * (sizeof(int16))); /* RAH */ nsamps += FE->NUM_OVERFLOW_SAMPS; FE->NUM_OVERFLOW_SAMPS = 0; /*reset overflow samps count */ } /* compute how many complete frames can be processed and which samples correspond to those samps */ frame_count = 0; for (frame_start = 0; frame_start + FE->FRAME_SIZE <= nsamps; frame_start += FE->FRAME_SHIFT) frame_count++; if ((cep = (mfcc_t **) fe_create_2d(frame_count + 1, FE->FEATURE_DIMENSION, sizeof(mfcc_t))) == NULL) { E_WARN ("memory alloc for cep failed in fe_process_utt()\n\tfe_create_2d(%ld,%d,%d)\n", (long int) (frame_count + 1), FE->FEATURE_DIMENSION, sizeof(mfcc_t)); return (FE_MEM_ALLOC_ERROR); } spbuf_len = (frame_count - 1) * FE->FRAME_SHIFT + FE->FRAME_SIZE; if ((spbuf = (frame_t *) calloc(spbuf_len, sizeof(frame_t))) == NULL) { E_WARN("memory alloc failed in fe_process_utt()\n"); return (FE_MEM_ALLOC_ERROR); } /* Add dither, if requested */ if (FE->dither) { fe_dither(tmp_spch, spbuf_len); } /* pre-emphasis if needed, convert from int16 to float64 */ if (FE->PRE_EMPHASIS_ALPHA != 0.0) { fe_pre_emphasis(tmp_spch, spbuf, spbuf_len, FE->PRE_EMPHASIS_ALPHA, FE->PRIOR); } else { fe_short_to_frame(tmp_spch, spbuf, spbuf_len); } /* frame based processing - let's make some cepstra... */ fr_data = (frame_t *) calloc(FE->FRAME_SIZE, sizeof(frame_t)); if (fr_data == NULL) { E_WARN("memory alloc failed in fe_process_utt()\n"); return (FE_MEM_ALLOC_ERROR); } for (whichframe = 0; whichframe < frame_count; whichframe++) { for (i = 0; i < FE->FRAME_SIZE; i++) fr_data[i] = spbuf[whichframe * FE->FRAME_SHIFT + i]; fe_hamming_window(fr_data, FE->HAMMING_WINDOW, FE->FRAME_SIZE, FE->remove_dc); frame_return_value = fe_frame_to_fea(FE, fr_data, cep[whichframe]); if (FE_SUCCESS != frame_return_value) { return_value = frame_return_value; } } /* done making cepstra */ /* assign samples which don't fill an entire frame to FE overflow buffer for use on next pass */ if ((offset = ((frame_count) * FE->FRAME_SHIFT)) < nsamps) { memcpy(FE->OVERFLOW_SAMPS, tmp_spch + offset, (nsamps - offset) * sizeof(int16)); FE->NUM_OVERFLOW_SAMPS = nsamps - offset; FE->PRIOR = tmp_spch[offset - 1]; assert(FE->NUM_OVERFLOW_SAMPS < FE->FRAME_SIZE); } if (spch != tmp_spch) free(tmp_spch); free(spbuf); free(fr_data); } /* if not enough total samps for a single frame, append new samps to previously stored overlap samples */ else { memcpy(FE->OVERFLOW_SAMPS + FE->NUM_OVERFLOW_SAMPS, tmp_spch, nsamps * (sizeof(int16))); FE->NUM_OVERFLOW_SAMPS += nsamps; assert(FE->NUM_OVERFLOW_SAMPS < FE->FRAME_SIZE); frame_count = 0; } *cep_block = cep; /* MLS */ *nframes = frame_count; return return_value; }
void ld_process_raw_impl(live_decoder_t * _decoder, int16 * samples, int32 num_samples, int32 end_utt) { float32 dummy_frame[MAX_CEP_LEN]; float32 **frames = 0; int32 num_frames = 0; int32 num_features = 0; int32 begin_utt = _decoder->num_frames_entered == 0; int32 return_value; int i; assert(_decoder != NULL); if (begin_utt) { fe_start_utt(_decoder->fe); } if (_decoder->swap) { for (i = 0; i < num_samples; i++) { SWAP_INT16(samples + i); } } return_value = fe_process_utt(_decoder->fe, samples, num_samples, &frames, &num_frames); if (end_utt) { return_value = fe_end_utt(_decoder->fe, dummy_frame, &num_frames); if (num_frames != 0) { /* ARCHAN: If num_frames !=0, assign this last ending frame to frames again. The computation will then be correct. Should clean up the finite state logic in fe_interface layer. */ frames = (float32 **) ckd_calloc_2d(1, _decoder->fe->NUM_CEPSTRA, sizeof(float32)); memcpy(frames[0], dummy_frame, _decoder->fe->NUM_CEPSTRA * sizeof(float32)); } } if (FE_ZERO_ENERGY_ERROR == return_value) { E_WARN("Zero energy frame(s). Consider using dither\n"); } if (num_frames > 0) { num_features = feat_s2mfc2feat_block(kbcore_fcb(_decoder->kbcore), frames, num_frames, begin_utt, end_utt, _decoder->features); _decoder->num_frames_entered += num_frames; } if (num_features > 0) { utt_decode_block(_decoder->features, num_features, &_decoder->num_frames_decoded, &_decoder->kb); } if (frames != NULL) { ckd_free_2d((void **) frames); } }
// Loads the texture from the specified file and stores it in iTexture. Note // that we're using the GLAUX library here, which is generally discouraged, // but in this case spares us having to write a bitmap loading routine. GLuint LoadTexture(const char* const filename) { #pragma pack(1) struct TGAHEADER { char identsize; // Size of ID field that follows header (0) char colorMapType; // 0 = None, 1 = palette char imageType; // 0 = none, 1 = indexed, 2 = rgb, 3 = grey, +8=rle unsigned short colorMapStart; // First color map entry unsigned short colorMapLength; // Number of colors unsigned char colorMapBits; // bits per palette entry unsigned short xstart; // image x origin unsigned short ystart; // image y origin unsigned short width; // width in pixels unsigned short height; // height in pixels char bits; // bits per pixel (8 16, 24, 32) char descriptor; // image descriptor }; #pragma pack(8) char fullPathName[2048]; dGetWorkingFileName (filename, fullPathName); TextureCache& cache = TextureCache::GetChache(); GLuint texture = cache.GetTexture(fullPathName); if (!texture) { FILE* const pFile = fopen (fullPathName, "rb"); if(pFile == NULL) { return 0; } //dAssert (sizeof (TGAHEADER) == 18); // Read in header (binary) sizeof(TGAHEADER) = 18 TGAHEADER tgaHeader; // TGA file header fread(&tgaHeader, 18, 1, pFile); // Do byte swap for big vs little Indian tgaHeader.colorMapStart = SWAP_INT16(tgaHeader.colorMapStart); tgaHeader.colorMapLength = SWAP_INT16(tgaHeader.colorMapLength); tgaHeader.xstart = SWAP_INT16(tgaHeader.xstart); tgaHeader.ystart = SWAP_INT16(tgaHeader.ystart); tgaHeader.width = SWAP_INT16(tgaHeader.width); tgaHeader.height = SWAP_INT16(tgaHeader.height); // Get width, height, and depth of texture int width = tgaHeader.width; int height = tgaHeader.height; short sDepth = tgaHeader.bits / 8; dAssert ((sDepth == 3) || (sDepth == 4)); // Put some validity checks here. Very simply, I only understand // or care about 8, 24, or 32 bit targa's. if(tgaHeader.bits != 8 && tgaHeader.bits != 24 && tgaHeader.bits != 32) { fclose(pFile); return 0; } // Calculate size of image buffer unsigned lImageSize = width * height * sDepth; // Allocate memory and check for success char* const pBits = new char [width * height * 4]; if(pBits == NULL) { fclose(pFile); return 0; } // Read in the bits // Check for read error. This should catch RLE or other // weird formats that I don't want to recognize if(fread(pBits, lImageSize, 1, pFile) != 1) { fclose(pFile); delete[] pBits; return 0; } TextureImageFormat format = m_rgb; switch(sDepth) { case 1: format = m_luminace; break; case 3: format = m_rgb; break; case 4: format = m_rgba; break; }; texture = LoadImage(fullPathName, pBits, tgaHeader.width, tgaHeader.height, format); // Done with File fclose(pFile); delete[] pBits; } return texture; }
int32 write_s2stseg(const char *filename, state_t *state_seq, uint32 **active_astate, uint32 *n_active_astate, uint32 n_state, uint32 n_obs, uint32 **bp) { FILE *fh; uint32 q; int32 t; uint16 word, *stseg; /* Backtrace and build a phone segmentation. */ /* Find the non-emitting ending state */ for (q = 0; q < n_active_astate[n_obs-1]; ++q) { if (active_astate[n_obs-1][q] == n_state-1) break; } if (q == n_active_astate[n_obs-1]) { E_ERROR("Failed to align audio to trancript: final state of the search is not reached\n"); return S3_ERROR; } if ((fh = fopen(filename, "wb")) == NULL) { return S3_ERROR; } word = n_obs; SWAP_INT16(&word); fwrite(&word, 2, 1, fh); stseg = ckd_calloc(n_obs, sizeof(uint16)); for (t = n_obs-1; t >= 0; --t) { uint32 j; j = active_astate[t][q]; /* Follow any non-emitting states at time t first. */ while (state_seq[j].mixw == TYING_NON_EMITTING) { j = active_astate[t][bp[t][q]]; q = bp[t][q]; } /* mixw = senone (we hope!) */ stseg[t] = state_seq[j].mixw; SWAP_INT16(&stseg[t]); /* Backtrace. */ if (t > 0) { q = bp[t][q]; } } fwrite(stseg, 2, n_obs, fh); ckd_free(stseg); fclose(fh); return S3_SUCCESS; }