コード例 #1
0
ファイル: sphinx_fe.c プロジェクト: kirpen/pocketsphinx.js
/**
 * Process PCM audio from a filehandle.  Assume that wtf->infh is
 * positioned just after the file header.
 */
static int
decode_pcm(sphinx_wave2feat_t *wtf)
{
    size_t nsamp;
    int32 n, nfr, nchans, whichchan;
    uint32 nfloat;

    nchans = cmd_ln_int32_r(wtf->config, "-nchans");
    whichchan = cmd_ln_int32_r(wtf->config, "-whichchan");
    fe_start_utt(wtf->fe);
    nfloat = 0;
    while ((nsamp = fread(wtf->audio, 2, wtf->blocksize, wtf->infh)) != 0) {
        size_t nvec;
        int16 const *inspeech;

        /* Byteswap stuff here if necessary. */
        if (wtf->byteswap) {
            for (n = 0; n < nsamp; ++n)
                SWAP_INT16(wtf->audio + n);
        }

        /* Mix or pick channels. */
        if (nchans > 1)
            nsamp = mixnpick_channels(wtf->audio, nsamp, nchans, whichchan);
            
        inspeech = wtf->audio;
        nvec = wtf->featsize;
        /* Consume all samples. */
        while (nsamp) {
            nfr = nvec;
            fe_process_frames(wtf->fe, &inspeech, &nsamp, wtf->feat, &nfr, NULL);
            if (nfr) {
                if ((n = (*wtf->ot->output_frames)(wtf, wtf->feat, nfr)) < 0)
                    return -1;
                nfloat += n;
            }
        }
        inspeech = wtf->audio;
    }
    /* Now process any leftover audio frames. */
    fe_end_utt(wtf->fe, wtf->feat[0], &nfr);
    if (nfr) {
        if ((n = (*wtf->ot->output_frames)(wtf, wtf->feat, nfr)) < 0)
            return -1;
        nfloat += n;
    }

    if (fclose(wtf->infh) == EOF)
        E_ERROR_SYSTEM("Failed to close input file");
    wtf->infh = NULL;
    return nfloat;
}
コード例 #2
0
ファイル: test_ad_read.c プロジェクト: 006/ios_lab
static int32
file_ad_read(ad_rec_t * r, int16 * buf, int32 max)
{
    int32 i, k;

    k = fread(buf, sizeof(int16), max, infp);
    if (WORDS_BIGENDIAN) {
        for (i = 0; i < k; i++) {
	    SWAP_INT16(&buf[i]);
        }
    }

    return ((k > 0) ? k : -1);
}
コード例 #3
0
ファイル: sphinx_fe.c プロジェクト: kirpen/pocketsphinx.js
/**
 * Output HTK format header.
 */
static int
output_header_htk(sphinx_wave2feat_t *wtf, int32 nfloat)
{
    int32 samp_period;
    int16 samp_size;
    int16 param_kind;
    int swap = FALSE;

    /* HTK files are big-endian. */
    if (0 == strcmp("little", cmd_ln_str_r(wtf->config, "-mach_endian")))
        swap = TRUE;
    /* Same file size thing as in Sphinx files (I think) */
    if (swap) SWAP_INT32(&nfloat);
    if (fwrite(&nfloat, 4, 1, wtf->outfh) != 1)
        return -1;
    /* Sample period in 100ns units. */
    samp_period = (int32)(1e+7 / cmd_ln_float32_r(wtf->config, "-frate"));
    if (swap) SWAP_INT32(&samp_period);
    if (fwrite(&samp_period, 4, 1, wtf->outfh) != 1)
        return -1;
    /* Sample size - veclen * sizeof each sample. */
    samp_size = wtf->veclen * 4;
    if (swap) SWAP_INT16(&samp_size);
    if (fwrite(&samp_size, 2, 1, wtf->outfh) != 1)
        return -1;
    /* Format and flags. */
    if (cmd_ln_boolean_r(wtf->config, "-logspec")
        || cmd_ln_boolean_r(wtf->config, "-cep2spec"))
        param_kind = FBANK; /* log mel-filter bank outputs */
    else
        param_kind = MFCC | _O; /* MFCC + CEP0 (note reordering...) */
    if (swap) SWAP_INT16(&param_kind);
    if (fwrite(&param_kind, 2, 1, wtf->outfh) != 1)
        return -1;

    return 0;
}
コード例 #4
0
/*********************************************************************
   FUNCTION: fe_process_frame
   PARAMETERS: fe_t *FE, int16 *spch, int32 nsamps, mfcc_t *fr_cep
   RETURNS: status, successful or not 
   DESCRIPTION: processes the given speech data and returns
   features. Modified to process one frame of speech only. 
**********************************************************************/
int32
fe_process_frame(fe_t * FE, int16 * spch, int32 nsamps, mfcc_t * fr_cep)
{
    int32 spbuf_len, i;
    frame_t *spbuf;
    int32 return_value = FE_SUCCESS;

    spbuf_len = FE->FRAME_SIZE;

    /* assert(spbuf_len <= nsamps); */
    if ((spbuf = (frame_t *) calloc(spbuf_len, sizeof(frame_t))) == NULL) {
        E_FATAL("memory alloc failed in fe_process_frame()...exiting\n");
    }

    /* Added byte-swapping for Endian-ness compatibility */
    if (FE->swap) 
        for (i = 0; i < nsamps; i++)
            SWAP_INT16(&spch[i]);

    /* Add dither, if need. Warning: this may add dither twice to the
       samples in overlapping frames. */
    if (FE->dither) {
        fe_dither(spch, spbuf_len);
    }

    /* pre-emphasis if needed,convert from int16 to float64 */
    if (FE->PRE_EMPHASIS_ALPHA != 0.0) {
        fe_pre_emphasis(spch, spbuf, spbuf_len,
                        FE->PRE_EMPHASIS_ALPHA, FE->PRIOR);
        FE->PRIOR = spch[FE->FRAME_SHIFT - 1];  /* Z.A.B for frame by frame analysis  */
    }
    else {
        fe_short_to_frame(spch, spbuf, spbuf_len);
    }


    /* frame based processing - let's make some cepstra... */
    fe_hamming_window(spbuf, FE->HAMMING_WINDOW, FE->FRAME_SIZE, FE->remove_dc);
    return_value = fe_frame_to_fea(FE, spbuf, fr_cep);
    free(spbuf);

    return return_value;
}
コード例 #5
0
//	Loads the texture from the specified file and stores it in iTexture. Note
//	that we're using the GLAUX library here, which is generally discouraged,
//	but in this case spares us having to write a bitmap loading routine.
GLuint LoadTexture(const char* const filename)
{
	#pragma pack(1)
	struct TGAHEADER
	{
		GLbyte	identsize;              // Size of ID field that follows header (0)
		GLbyte	colorMapType;           // 0 = None, 1 = palette
		GLbyte	imageType;              // 0 = none, 1 = indexed, 2 = rgb, 3 = grey, +8=rle
		unsigned short	colorMapStart;  // First color map entry
		unsigned short	colorMapLength; // Number of colors
		unsigned char 	colorMapBits;   // bits per palette entry
		unsigned short	xstart;         // image x origin
		unsigned short	ystart;         // image y origin
		unsigned short	width;          // width in pixels
		unsigned short	height;         // height in pixels
		GLbyte	bits;                   // bits per pixel (8 16, 24, 32)
		GLbyte	descriptor;             // image descriptor
	};
	#pragma pack(8)


	char fullPathName[TERXTURE_PATH_NAME_SIZE ];
	GetWorkingFileName (filename, fullPathName);
	TextureCache& cache = TextureCache::GetChache();
	GLuint texture = cache.GetTexture(fullPathName);
	if (!texture) {
	
		FILE* const pFile = fopen (fullPathName, "rb");
   		if(pFile == NULL) {
			return 0;
		}
	    
		//_ASSERTE (sizeof (TGAHEADER) == 18);
		// Read in header (binary) sizeof(TGAHEADER) = 18
		TGAHEADER tgaHeader;		// TGA file header
		fread(&tgaHeader, 18, 1, pFile);

	    
		// Do byte swap for big vs little Indian
		tgaHeader.colorMapStart = SWAP_INT16(tgaHeader.colorMapStart);
		tgaHeader.colorMapLength = SWAP_INT16(tgaHeader.colorMapLength);
		tgaHeader.xstart = SWAP_INT16(tgaHeader.xstart);
		tgaHeader.ystart = SWAP_INT16(tgaHeader.ystart);
		tgaHeader.width = SWAP_INT16(tgaHeader.width);
		tgaHeader.height = SWAP_INT16(tgaHeader.height);
		        
		// Get width, height, and depth of texture
		GLint iWidth = tgaHeader.width;
		GLint iHeight = tgaHeader.height;
		short sDepth = tgaHeader.bits / 8;
		_ASSERTE ((sDepth == 3) || (sDepth == 4));

	    
		// Put some validity checks here. Very simply, I only understand
		// or care about 8, 24, or 32 bit targa's.
		if(tgaHeader.bits != 8 && tgaHeader.bits != 24 && tgaHeader.bits != 32) {
			fclose(pFile);
			return 0;
		}


		// Calculate size of image buffer
		unsigned lImageSize = tgaHeader.width * tgaHeader.height * sDepth;
	    
		// Allocate memory and check for success
		GLbyte* const pBits = new GLbyte [tgaHeader.width * tgaHeader.height * 4];
		if(pBits == NULL) {
			fclose(pFile);
			return 0;
		}
	    
		// Read in the bits
		// Check for read error. This should catch RLE or other 
		// weird formats that I don't want to recognize
		if(fread(pBits, lImageSize, 1, pFile) != 1)  {
			fclose(pFile);
			delete[] pBits;
			return 0; 
	   }


		GLenum eFormat = GL_RGBA;
		GLint iComponents = 4;
		switch(sDepth)
		{
			// intel arch
			case 3:     // Most likely case
				//eFormat = GL_BGR_EXT;
				eFormat = GL_BGR;
				//iComponents = GL_RGB;
				iComponents = 4;
				break;

			case 4:
				eFormat = GL_BGRA;
				//eFormat = GL_BGRA_EXT;
				//iComponents = GL_RGBA;
				iComponents = 4;
			break;

			case 1:
				eFormat = GL_LUMINANCE;
				iComponents = 1;
				break;
		};


		texture = 0;
		glGenTextures(1, &texture);
		if (texture) {
			//GLenum errr = glGetError ();
			glBindTexture(GL_TEXTURE_2D, texture);

		   // select modulate to mix texture with color for shading
			glTexEnvf( GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE );
			

			glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
			glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
		//	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
		//	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
		//	glTexImage2D(GL_TEXTURE_2D, 0, iComponents, iWidth, iHeight, 0, eFormat, GL_UNSIGNED_BYTE, pBits);

			// when texture area is small, bilinear filter the closest mipmap
		//  glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_NEAREST );

			// when texture area is small, trilinear filter mipmaped
			glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);

			// when texture area is large, bilinear filter the first mipmap
			glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );

			// build our texture mipmaps
			gluBuild2DMipmaps (GL_TEXTURE_2D, iComponents, iWidth, iHeight, eFormat, GL_UNSIGNED_BYTE, pBits);
			
			 



			// Done with File
			fclose(pFile);
			delete[] pBits;

			cache.InsertText (fullPathName, texture);
		}
	}
	return texture;
} 
コード例 #6
0
	unsigned SWAP_INT32(unsigned x)
	{
		return SWAP_INT16 ( x >> 16) + (SWAP_INT16 (x) << 16);
	}
コード例 #7
0
/*
 * Write state segmentation in Sphinx-II format.  (Must be written in BIG-ENDIAN
 * format!)
 */
static void write_s2stseg (char *dir, align_stseg_t *stseg, char *uttid, char *ctlspec)
{
    char filename[1024];
    FILE *fp;
    align_stseg_t *tmp;
    int32 k;
    s3cipid_t ci[3];
    word_posn_t wpos;
    int16 s2_info;
    char buf[8];
    static int32 byterev = -1;	/* Whether to byte reverse output data */
    
    build_output_uttfile (filename, dir, uttid, ctlspec);
    strcat (filename, ".v8_seg");		/* .v8_seg for compatibility */
    E_INFO("Writing Sphinx-II format state segmentation to: %s\n", filename);
    if ((fp = fopen (filename, "wb")) == NULL) {
	E_ERROR("fopen(%s,wb) failed\n", filename);
	return;
    }

    if (byterev < 0) {
	/* Byte ordering of host machine unknown; first figure it out */
	k = (int32) BYTE_ORDER_MAGIC;
	if (fwrite (&k, sizeof(int32), 1, fp) != 1)
	    goto write_error;

	fclose (fp);
	if ((fp = fopen (filename, "rb")) == NULL) {
	    E_ERROR ("fopen(%s,rb) failed\n", filename);
	    return;
	}
	if (fread (buf, 1, sizeof(int32), fp) != sizeof(int32)) {
	    E_ERROR ("fread(%s) failed\n", filename);
	    return;
	}
	fclose (fp);
	
	/* If buf[0] == lsB of BYTE_ORDER_MAGIC, we are little-endian.  Need to byterev */
	byterev = (buf[0] == (BYTE_ORDER_MAGIC & 0x000000ff)) ? 1 : 0;

	if ((fp = fopen (filename, "wb")) == NULL) {
	    E_ERROR("fopen(%s,wb) failed\n", filename);
	    return;
	}
    }
    
    /* Write #frames */
    for (k = 0, tmp = stseg; tmp; k++, tmp = tmp->next);
    if (byterev)
	SWAP_INT32(&k);
    if (fwrite (&k, sizeof(int32), 1, fp) != 1)
	goto write_error;
    
    /* Write state info for each frame */
    for (; stseg; stseg = stseg->next) {
	mdef_phone_components (mdef, stseg->pid, ci, &(ci[1]), &(ci[2]), &wpos);

	s2_info = ci[0] * mdef->n_emit_state + stseg->state;
	if (stseg->start)
	    s2_info |= 0x8000;
	if (byterev)
	    SWAP_INT16(&s2_info);
	
	if (fwrite (&s2_info, sizeof(int16), 1, fp) != 1)
	    goto write_error;
    }
    
    fclose (fp);
    return;
    
write_error:
    E_ERROR("fwrite(%s) failed\n", filename);
    fclose (fp);
}
コード例 #8
0
static int
read_riff_header(FILE *infh)
{
    char id[4];
    int32 intval, header_len;
    int16 shortval;

    /* RIFF files are little-endian by definition. */
    cmd_ln_set_str("-input_endian", "little");

    /* Read in all the header chunks and etcetera. */
    TRY_FREAD(id, 1, 4, infh);
    /* Total file length (we don't care) */
    TRY_FREAD(&intval, 4, 1, infh);
    /* 'WAVE' */
    TRY_FREAD(id, 1, 4, infh);
    if (0 != memcmp(id, "WAVE", 4)) {
        E_ERROR("This is not a WAVE file\n");
        goto error_out;
    }
    /* 'fmt ' */
    TRY_FREAD(id, 1, 4, infh);
    if (0 != memcmp(id, "fmt ", 4)) {
        E_ERROR("Format chunk missing\n");
        goto error_out;
    }
    /* Length of 'fmt ' chunk */
    TRY_FREAD(&intval, 4, 1, infh);
    SWAP_INT32(&intval);
    header_len = intval;

    /* Data format. */
    TRY_FREAD(&shortval, 2, 1, infh);
    SWAP_INT16(&shortval);
    if (shortval != 1) { /* PCM */
        E_ERROR("WAVE file is not in PCM format\n");
        goto error_out;
    }

    /* Number of channels. */
    TRY_FREAD(&shortval, 2, 1, infh);
    SWAP_INT16(&shortval);
    if (shortval != 1) { /* PCM */
        E_ERROR("WAVE file is not single channel\n");
        goto error_out;
    }

    /* Sampling rate (finally!) */
    TRY_FREAD(&intval, 4, 1, infh);
    SWAP_INT32(&intval);
    if (cmd_ln_int32("-samprate") == 0)
        cmd_ln_set_int32("-samprate", intval);
    else if (cmd_ln_int32("-samprate") != intval) {
        E_WARN("WAVE file sampling rate %d != -samprate %d\n",
               intval, cmd_ln_int32("-samprate"));
    }

    /* Average bytes per second (we don't care) */
    TRY_FREAD(&intval, 4, 1, infh);

    /* Block alignment (we don't care) */
    TRY_FREAD(&shortval, 2, 1, infh);

    /* Bits per sample (must be 16) */
    TRY_FREAD(&shortval, 2, 1, infh);
    SWAP_INT16(&shortval);
    if (shortval != 16) {
        E_ERROR("WAVE file is not 16-bit\n");
        goto error_out;
    }

    /* Any extra parameters. */
    if (header_len > 16)
        fseek(infh, header_len - 16, SEEK_CUR);

    /* Now skip to the 'data' chunk. */
    while (1) {
        TRY_FREAD(id, 1, 4, infh);
        if (0 == memcmp(id, "data", 4)) {
            /* Total number of bytes of data (we don't care). */
            TRY_FREAD(&intval, 4, 1, infh);
            break;
        }
        else {
            /* Some other stuff... */
            /* Number of bytes of ... whatever */
            TRY_FREAD(&intval, 4, 1, infh);
            SWAP_INT32(&intval);
            fseek(infh, intval, SEEK_CUR);
        }
    }

    /* We are ready to rumble. */
    return 0;
error_out:
    return -1;
}
コード例 #9
0
bin_mdef_t *
bin_mdef_read(cmd_ln_t *config, const char *filename)
{
    bin_mdef_t *m;
    FILE *fh;
    size_t tree_start;
    int32 val, i, swap, pos, end;
    int32 *sseq_size;
    int do_mmap;

    /* Try to read it as text first. */
    if ((m = bin_mdef_read_text(config, filename)) != NULL)
        return m;

    E_INFO("Reading binary model definition: %s\n", filename);
    if ((fh = fopen(filename, "rb")) == NULL)
        return NULL;

    if (fread(&val, 4, 1, fh) != 1) {
        fclose(fh);
        E_ERROR_SYSTEM("Failed to read byte-order marker from %s\n",
                       filename);
        return NULL;
    }
    swap = 0;
    if (val == BIN_MDEF_OTHER_ENDIAN) {
        swap = 1;
        E_INFO("Must byte-swap %s\n", filename);
    }
    if (fread(&val, 4, 1, fh) != 1) {
        fclose(fh);
        E_ERROR_SYSTEM("Failed to read version from %s\n", filename);
        return NULL;
    }
    if (swap)
        SWAP_INT32(&val);
    if (val > BIN_MDEF_FORMAT_VERSION) {
        E_ERROR("File format version %d for %s is newer than library\n",
                val, filename);
        fclose(fh);
        return NULL;
    }
    if (fread(&val, 4, 1, fh) != 1) {
        fclose(fh);
        E_ERROR_SYSTEM("Failed to read header length from %s\n", filename);
        return NULL;
    }
    if (swap)
        SWAP_INT32(&val);
    /* Skip format descriptor. */
    fseek(fh, val, SEEK_CUR);

    /* Finally allocate it. */
    m = ckd_calloc(1, sizeof(*m));
    m->refcnt = 1;

    /* Check these, to make gcc/glibc shut up. */
#define FREAD_SWAP32_CHK(dest)                                          \
    if (fread((dest), 4, 1, fh) != 1) {                                 \
        fclose(fh);                                                     \
        ckd_free(m);                                                    \
        E_ERROR_SYSTEM("Failed to read %s from %s\n", #dest, filename); \
        return NULL;                                                    \
    }                                                                   \
    if (swap) SWAP_INT32(dest);
    
    FREAD_SWAP32_CHK(&m->n_ciphone);
    FREAD_SWAP32_CHK(&m->n_phone);
    FREAD_SWAP32_CHK(&m->n_emit_state);
    FREAD_SWAP32_CHK(&m->n_ci_sen);
    FREAD_SWAP32_CHK(&m->n_sen);
    FREAD_SWAP32_CHK(&m->n_tmat);
    FREAD_SWAP32_CHK(&m->n_sseq);
    FREAD_SWAP32_CHK(&m->n_ctx);
    FREAD_SWAP32_CHK(&m->n_cd_tree);
    FREAD_SWAP32_CHK(&m->sil);

    /* CI names are first in the file. */
    m->ciname = ckd_calloc(m->n_ciphone, sizeof(*m->ciname));

    /* Decide whether to read in the whole file or mmap it. */
    do_mmap = config ? cmd_ln_boolean_r(config, "-mmap") : TRUE;
    if (swap) {
        E_WARN("-mmap specified, but mdef is other-endian.  Will not memory-map.\n");
        do_mmap = FALSE;
    } 
    /* Actually try to mmap it. */
    if (do_mmap) {
        m->filemap = mmio_file_read(filename);
        if (m->filemap == NULL)
            do_mmap = FALSE;
    }
    pos = ftell(fh);
    if (do_mmap) {
        /* Get the base pointer from the memory map. */
        m->ciname[0] = (char *)mmio_file_ptr(m->filemap) + pos;
        /* Success! */
        m->alloc_mode = BIN_MDEF_ON_DISK;
    }
    else {
        /* Read everything into memory. */
        m->alloc_mode = BIN_MDEF_IN_MEMORY;
        fseek(fh, 0, SEEK_END);
        end = ftell(fh);
        fseek(fh, pos, SEEK_SET);
        m->ciname[0] = ckd_malloc(end - pos);
        if (fread(m->ciname[0], 1, end - pos, fh) != end - pos)
            E_FATAL("Failed to read %d bytes of data from %s\n", end - pos, filename);
    }

    for (i = 1; i < m->n_ciphone; ++i)
        m->ciname[i] = m->ciname[i - 1] + strlen(m->ciname[i - 1]) + 1;

    /* Skip past the padding. */
    tree_start =
        m->ciname[i - 1] + strlen(m->ciname[i - 1]) + 1 - m->ciname[0];
    tree_start = (tree_start + 3) & ~3;
    m->cd_tree = (cd_tree_t *) (m->ciname[0] + tree_start);
    if (swap) {
        for (i = 0; i < m->n_cd_tree; ++i) {
            SWAP_INT16(&m->cd_tree[i].ctx);
            SWAP_INT16(&m->cd_tree[i].n_down);
            SWAP_INT32(&m->cd_tree[i].c.down);
        }
    }
    m->phone = (mdef_entry_t *) (m->cd_tree + m->n_cd_tree);
    if (swap) {
        for (i = 0; i < m->n_phone; ++i) {
            SWAP_INT32(&m->phone[i].ssid);
            SWAP_INT32(&m->phone[i].tmat);
        }
    }
    sseq_size = (int32 *) (m->phone + m->n_phone);
    if (swap)
        SWAP_INT32(sseq_size);
    m->sseq = ckd_calloc(m->n_sseq, sizeof(*m->sseq));
    m->sseq[0] = (uint16 *) (sseq_size + 1);
    if (swap) {
        for (i = 0; i < *sseq_size; ++i)
            SWAP_INT16(m->sseq[0] + i);
    }
    if (m->n_emit_state) {
        for (i = 1; i < m->n_sseq; ++i)
            m->sseq[i] = m->sseq[0] + i * m->n_emit_state;
    }
    else {
        m->sseq_len = (uint8 *) (m->sseq[0] + *sseq_size);
        for (i = 1; i < m->n_sseq; ++i)
            m->sseq[i] = m->sseq[i - 1] + m->sseq_len[i - 1];
    }

    /* Now build the CD-to-CI mappings using the senone sequences.
     * This is the only really accurate way to do it, though it is
     * still inaccurate in the case of heterogeneous topologies or
     * cross-state tying. */
    m->cd2cisen = (int16 *) ckd_malloc(m->n_sen * sizeof(*m->cd2cisen));
    m->sen2cimap = (int16 *) ckd_malloc(m->n_sen * sizeof(*m->sen2cimap));

    /* Default mappings (identity, none) */
    for (i = 0; i < m->n_ci_sen; ++i)
        m->cd2cisen[i] = i;
    for (; i < m->n_sen; ++i)
        m->cd2cisen[i] = -1;
    for (i = 0; i < m->n_sen; ++i)
        m->sen2cimap[i] = -1;
    for (i = 0; i < m->n_phone; ++i) {
        int32 j, ssid = m->phone[i].ssid;

        for (j = 0; j < bin_mdef_n_emit_state_phone(m, i); ++j) {
            int s = bin_mdef_sseq2sen(m, ssid, j);
            int ci = bin_mdef_pid2ci(m, i);
            /* Take the first one and warn if we have cross-state tying. */
            if (m->sen2cimap[s] == -1)
                m->sen2cimap[s] = ci;
            if (m->sen2cimap[s] != ci)
                E_WARN
                    ("Senone %d is shared between multiple base phones\n",
                     s);

            if (j > bin_mdef_n_emit_state_phone(m, ci))
                E_WARN("CD phone %d has fewer states than CI phone %d\n",
                       i, ci);
            else
                m->cd2cisen[s] =
                    bin_mdef_sseq2sen(m, m->phone[ci].ssid, j);
        }
    }

    /* Set the silence phone. */
    m->sil = bin_mdef_ciphone_id(m, S3_SILENCE_CIPHONE);

    E_INFO
        ("%d CI-phone, %d CD-phone, %d emitstate/phone, %d CI-sen, %d Sen, %d Sen-Seq\n",
         m->n_ciphone, m->n_phone - m->n_ciphone, m->n_emit_state,
         m->n_ci_sen, m->n_sen, m->n_sseq);
    fclose(fh);
    return m;
}
コード例 #10
0
/*********************************************************************
   FUNCTION: fe_process_utt
   PARAMETERS: fe_t *FE, int16 *spch, int32 nsamps, mfcc_t **cep, int32 nframes
   RETURNS: status, successful or not
   DESCRIPTION: processes the given speech data and returns
   features. will prepend overflow data from last call and store new
   overflow data within the FE
**********************************************************************/
int32
fe_process_utt(fe_t * FE, int16 * spch, int32 nsamps,
               mfcc_t *** cep_block, int32 * nframes)
{
    int32 frame_start, frame_count = 0, whichframe = 0;
    int32 i, spbuf_len, offset = 0;
    frame_t *spbuf, *fr_data;
    int16 *tmp_spch = spch;
    mfcc_t **cep = NULL;
    int32 return_value = FE_SUCCESS;
    int32 frame_return_value;

    /* Added byte-swapping for Endian-ness compatibility */
    if (FE->swap) 
        for (i = 0; i < nsamps; i++)
            SWAP_INT16(&spch[i]);

    /* are there enough samples to make at least 1 frame? */
    if (nsamps + FE->NUM_OVERFLOW_SAMPS >= FE->FRAME_SIZE) {

        /* if there are previous samples, pre-pend them to input speech samps */
        if ((FE->NUM_OVERFLOW_SAMPS > 0)) {

            if ((tmp_spch =
                 (int16 *) malloc(sizeof(int16) *
                                  (FE->NUM_OVERFLOW_SAMPS +
                                   nsamps))) == NULL) {
                E_WARN("memory alloc failed in fe_process_utt()\n");
                return FE_MEM_ALLOC_ERROR;
            }
            /* RAH */
            memcpy(tmp_spch, FE->OVERFLOW_SAMPS, FE->NUM_OVERFLOW_SAMPS * (sizeof(int16)));     /* RAH */
            memcpy(tmp_spch + FE->NUM_OVERFLOW_SAMPS, spch, nsamps * (sizeof(int16)));  /* RAH */
            nsamps += FE->NUM_OVERFLOW_SAMPS;
            FE->NUM_OVERFLOW_SAMPS = 0; /*reset overflow samps count */
        }
        /* compute how many complete frames  can be processed and which samples correspond to those samps */
        frame_count = 0;
        for (frame_start = 0;
             frame_start + FE->FRAME_SIZE <= nsamps;
             frame_start += FE->FRAME_SHIFT)
            frame_count++;


        if ((cep =
             (mfcc_t **) fe_create_2d(frame_count + 1,
                                      FE->FEATURE_DIMENSION,
                                      sizeof(mfcc_t))) == NULL) {
            E_WARN
                ("memory alloc for cep failed in fe_process_utt()\n\tfe_create_2d(%ld,%d,%d)\n",
                 (long int) (frame_count + 1),
                 FE->FEATURE_DIMENSION, sizeof(mfcc_t));
            return (FE_MEM_ALLOC_ERROR);
        }
        spbuf_len = (frame_count - 1) * FE->FRAME_SHIFT + FE->FRAME_SIZE;

        if ((spbuf =
             (frame_t *) calloc(spbuf_len, sizeof(frame_t))) == NULL) {
            E_WARN("memory alloc failed in fe_process_utt()\n");
            return (FE_MEM_ALLOC_ERROR);
        }

        /* Add dither, if requested */
        if (FE->dither) {
            fe_dither(tmp_spch, spbuf_len);
        }

        /* pre-emphasis if needed, convert from int16 to float64 */
        if (FE->PRE_EMPHASIS_ALPHA != 0.0) {
            fe_pre_emphasis(tmp_spch, spbuf, spbuf_len,
                            FE->PRE_EMPHASIS_ALPHA, FE->PRIOR);
        }
        else {
            fe_short_to_frame(tmp_spch, spbuf, spbuf_len);
        }

        /* frame based processing - let's make some cepstra... */
        fr_data = (frame_t *) calloc(FE->FRAME_SIZE, sizeof(frame_t));
        if (fr_data == NULL) {
            E_WARN("memory alloc failed in fe_process_utt()\n");
            return (FE_MEM_ALLOC_ERROR);
        }

        for (whichframe = 0; whichframe < frame_count; whichframe++) {

            for (i = 0; i < FE->FRAME_SIZE; i++)
                fr_data[i] = spbuf[whichframe * FE->FRAME_SHIFT + i];

            fe_hamming_window(fr_data, FE->HAMMING_WINDOW, FE->FRAME_SIZE, FE->remove_dc);

            frame_return_value =
                fe_frame_to_fea(FE, fr_data, cep[whichframe]);

            if (FE_SUCCESS != frame_return_value) {
                return_value = frame_return_value;
            }
        }
        /* done making cepstra */


        /* assign samples which don't fill an entire frame to FE overflow buffer for use on next pass */
        if ((offset = ((frame_count) * FE->FRAME_SHIFT)) < nsamps) {
            memcpy(FE->OVERFLOW_SAMPS, tmp_spch + offset,
                   (nsamps - offset) * sizeof(int16));
            FE->NUM_OVERFLOW_SAMPS = nsamps - offset;
            FE->PRIOR = tmp_spch[offset - 1];
            assert(FE->NUM_OVERFLOW_SAMPS < FE->FRAME_SIZE);
        }

        if (spch != tmp_spch)
            free(tmp_spch);

        free(spbuf);
        free(fr_data);
    }

    /* if not enough total samps for a single frame, append new samps to
       previously stored overlap samples */
    else {
        memcpy(FE->OVERFLOW_SAMPS + FE->NUM_OVERFLOW_SAMPS,
               tmp_spch, nsamps * (sizeof(int16)));
        FE->NUM_OVERFLOW_SAMPS += nsamps;
        assert(FE->NUM_OVERFLOW_SAMPS < FE->FRAME_SIZE);
        frame_count = 0;
    }

    *cep_block = cep;           /* MLS */
    *nframes = frame_count;
    return return_value;
}
コード例 #11
0
void
ld_process_raw_impl(live_decoder_t * _decoder,
                    int16 * samples, int32 num_samples, int32 end_utt)
{
    float32 dummy_frame[MAX_CEP_LEN];
    float32 **frames = 0;
    int32 num_frames = 0;
    int32 num_features = 0;
    int32 begin_utt = _decoder->num_frames_entered == 0;
    int32 return_value;
    int i;

    assert(_decoder != NULL);

    if (begin_utt) {
        fe_start_utt(_decoder->fe);
    }

    if (_decoder->swap) {
        for (i = 0; i < num_samples; i++) {
            SWAP_INT16(samples + i);
        }
    }

    return_value =
        fe_process_utt(_decoder->fe, samples, num_samples, &frames,
                       &num_frames);

    if (end_utt) {
        return_value = fe_end_utt(_decoder->fe, dummy_frame, &num_frames);
        if (num_frames != 0) {
            /* ARCHAN: If num_frames !=0, assign this last ending frame to
               frames again.  The computation will then be correct.  Should
               clean up the finite state logic in fe_interface layer. 
             */
            frames =
                (float32 **) ckd_calloc_2d(1, _decoder->fe->NUM_CEPSTRA,
                                           sizeof(float32));
            memcpy(frames[0], dummy_frame,
                   _decoder->fe->NUM_CEPSTRA * sizeof(float32));
        }
    }

    if (FE_ZERO_ENERGY_ERROR == return_value) {
        E_WARN("Zero energy frame(s). Consider using dither\n");
    }

    if (num_frames > 0) {
        num_features = feat_s2mfc2feat_block(kbcore_fcb(_decoder->kbcore),
                                             frames,
                                             num_frames,
                                             begin_utt,
                                             end_utt, _decoder->features);
        _decoder->num_frames_entered += num_frames;
    }

    if (num_features > 0) {
        utt_decode_block(_decoder->features,
                         num_features,
                         &_decoder->num_frames_decoded, &_decoder->kb);
    }

    if (frames != NULL) {
        ckd_free_2d((void **) frames);
    }
}
コード例 #12
0
//	Loads the texture from the specified file and stores it in iTexture. Note
//	that we're using the GLAUX library here, which is generally discouraged,
//	but in this case spares us having to write a bitmap loading routine.
GLuint LoadTexture(const char* const filename)
{
	#pragma pack(1)
	struct TGAHEADER
	{
		char identsize;					// Size of ID field that follows header (0)
		char colorMapType;				// 0 = None, 1 = palette
		char imageType;					// 0 = none, 1 = indexed, 2 = rgb, 3 = grey, +8=rle
		unsigned short colorMapStart;	// First color map entry
		unsigned short colorMapLength;	// Number of colors
		unsigned char colorMapBits;		// bits per palette entry
		unsigned short xstart;			// image x origin
		unsigned short ystart;			// image y origin
		unsigned short width;			// width in pixels
		unsigned short height;			// height in pixels
		char bits;						// bits per pixel (8 16, 24, 32)
		char descriptor;				// image descriptor
	};
	#pragma pack(8)

	char fullPathName[2048];
	dGetWorkingFileName (filename, fullPathName);
	TextureCache& cache = TextureCache::GetChache();
	GLuint texture = cache.GetTexture(fullPathName);
	if (!texture) {

		FILE* const pFile = fopen (fullPathName, "rb");
		if(pFile == NULL) {
			return 0;
		}

		//dAssert (sizeof (TGAHEADER) == 18);
		// Read in header (binary) sizeof(TGAHEADER) = 18
		TGAHEADER tgaHeader;		// TGA file header
		fread(&tgaHeader, 18, 1, pFile);

		// Do byte swap for big vs little Indian
		tgaHeader.colorMapStart = SWAP_INT16(tgaHeader.colorMapStart);
		tgaHeader.colorMapLength = SWAP_INT16(tgaHeader.colorMapLength);
		tgaHeader.xstart = SWAP_INT16(tgaHeader.xstart);
		tgaHeader.ystart = SWAP_INT16(tgaHeader.ystart);
		tgaHeader.width = SWAP_INT16(tgaHeader.width);
		tgaHeader.height = SWAP_INT16(tgaHeader.height);

		// Get width, height, and depth of texture
		int width = tgaHeader.width;
		int height = tgaHeader.height;
		short sDepth = tgaHeader.bits / 8;
		dAssert ((sDepth == 3) || (sDepth == 4));

		// Put some validity checks here. Very simply, I only understand
		// or care about 8, 24, or 32 bit targa's.
		if(tgaHeader.bits != 8 && tgaHeader.bits != 24 && tgaHeader.bits != 32) {
			fclose(pFile);
			return 0;
		}


		// Calculate size of image buffer
		unsigned lImageSize = width * height * sDepth;

		// Allocate memory and check for success
		char* const pBits = new char [width * height * 4];
		if(pBits == NULL) {
			fclose(pFile);
			return 0;
		}

		// Read in the bits
		// Check for read error. This should catch RLE or other 
		// weird formats that I don't want to recognize
		if(fread(pBits, lImageSize, 1, pFile) != 1)  {
			fclose(pFile);
			delete[] pBits;
			return 0; 
		}

		TextureImageFormat format = m_rgb;
		switch(sDepth)
		{
			case 1:
				format = m_luminace;
				break;

			case 3:     
				format = m_rgb;
				break;

			case 4:
				format = m_rgba;
				break;
		};

		texture = LoadImage(fullPathName, pBits, tgaHeader.width, tgaHeader.height, format);

		// Done with File
		fclose(pFile);
		delete[] pBits;
	}
	return texture;
} 
コード例 #13
0
ファイル: viterbi.c プロジェクト: Ankit77/cmusphinx
int32
write_s2stseg(const char *filename,
	      state_t *state_seq,
	      uint32 **active_astate,
	      uint32 *n_active_astate,
	      uint32 n_state,
	      uint32 n_obs,
	      uint32 **bp)
{
    FILE *fh;
    uint32 q;
    int32 t;
    uint16 word, *stseg;

    /* Backtrace and build a phone segmentation. */
    /* Find the non-emitting ending state */
    for (q = 0; q < n_active_astate[n_obs-1]; ++q) {
	if (active_astate[n_obs-1][q] == n_state-1)
	    break;
    }
    if (q == n_active_astate[n_obs-1]) {
	E_ERROR("Failed to align audio to trancript: final state of the search is not reached\n");
	return S3_ERROR;
    }

    if ((fh = fopen(filename, "wb")) == NULL) {
	return S3_ERROR;
    }

    word = n_obs;
    SWAP_INT16(&word);
    fwrite(&word, 2, 1, fh);

    stseg = ckd_calloc(n_obs, sizeof(uint16));

    for (t = n_obs-1; t >= 0; --t) {
	uint32 j;

	j = active_astate[t][q];

	/* Follow any non-emitting states at time t first. */
	while (state_seq[j].mixw == TYING_NON_EMITTING) {
	    j = active_astate[t][bp[t][q]];
	    q = bp[t][q];
	}

	/* mixw = senone (we hope!) */
	stseg[t] = state_seq[j].mixw;
	SWAP_INT16(&stseg[t]);

	/* Backtrace. */
	if (t > 0) {
	    q = bp[t][q];
	}
    }

    fwrite(stseg, 2, n_obs, fh);
    ckd_free(stseg);
    fclose(fh);
    return S3_SUCCESS;
}