Exemplo n.º 1
0
static int32
lm_read_dump_ug(lm_t * lm, const char *file)
{
    int32 i;

    assert(lm->n_ug > 0);

    /* Read ug; remember sentinel ug at the end! */
    lm->ug = (ug_t *) ckd_calloc(lm->n_ug + 1, sizeof(ug_t));
    if (fread(lm->ug, sizeof(ug_t), lm->n_ug + 1, lm->fp) !=
        (size_t) (lm->n_ug + 1)) {
        E_ERROR("unigram fread(%s) failed\n", file);
        return LM_FAIL;
        /*        E_FATAL("fread(%s) failed\n", file); */
    }

    if (lm->byteswap) {
        for (i = 0; i <= lm->n_ug; i++) {
            SWAP_INT32(&(lm->ug[i].prob.l));
            SWAP_INT32(&(lm->ug[i].bowt.l));
            SWAP_INT32(&(lm->ug[i].firstbg));
        }
    }
    E_INFO("Read %8d unigrams [in memory]\n", lm->n_ug);
    return LM_SUCCESS;
}
Exemplo n.º 2
0
static void
read_dmp_weight_array(FILE * fp, logmath_t * lmath, uint8 do_swap,
                      int32 counts, ngram_raw_t * raw_ngrams,
                      int weight_idx)
{
    int32 i, k;
    dmp_weight_t *tmp_weight_arr;

    fread(&k, sizeof(k), 1, fp);
    if (do_swap)
        SWAP_INT32(&k);
    tmp_weight_arr =
        (dmp_weight_t *) ckd_calloc(k, sizeof(*tmp_weight_arr));
    fread(tmp_weight_arr, sizeof(*tmp_weight_arr), k, fp);
    for (i = 0; i < k; i++) {
        if (do_swap)
            SWAP_INT32(&tmp_weight_arr[i].l);
        /* Convert values to log. */
        tmp_weight_arr[i].f =
            logmath_log10_to_log_float(lmath, tmp_weight_arr[i].f);
    }
    //replace indexes with real probs in raw bigrams
    for (i = 0; i < counts; i++) {
        raw_ngrams[i].weights[weight_idx] =
            tmp_weight_arr[(int) raw_ngrams[i].weights[weight_idx]].f;
    }
    ckd_free(tmp_weight_arr);
}
Exemplo n.º 3
0
static int
acmod_log_mfc(acmod_t *acmod,
              mfcc_t **cep, int n_frames)
{
    int i, n;
    int32 *ptr = (int32 *)cep[0];

    n = n_frames * feat_cepsize(acmod->fcb);
    /* Swap bytes. */
    if (!WORDS_BIGENDIAN) {
        for (i = 0; i < (n * sizeof(mfcc_t)); ++i) {
            SWAP_INT32(ptr + i);
        }
    }
    /* Write features. */
    if (fwrite(cep[0], sizeof(mfcc_t), n, acmod->mfcfh) != n) {
        E_ERROR_SYSTEM("Failed to write %d values to log file", n);
    }

    /* Swap them back. */
    if (!WORDS_BIGENDIAN) {
        for (i = 0; i < (n * sizeof(mfcc_t)); ++i) {
            SWAP_INT32(ptr + i);
        }
    }
    return 0;
}
Exemplo n.º 4
0
/**
 * "Detect" Sphinx MFCC files, meaning verify their lousy headers, and
 * set up some parameters from the config object.
 *
 * @return TRUE, or -1 on error.
 */
static int
detect_sphinx_mfc(sphinx_wave2feat_t *wtf)
{
    FILE *fh;
    int32 len;
    long flen;

    if ((fh = fopen(wtf->infile, "rb")) == NULL) {
        E_ERROR_SYSTEM("Failed to open %s", wtf->infile);
        return -1;
    }
    if (fread(&len, 4, 1, fh) != 1) {
        E_ERROR_SYSTEM("Failed to read header from %s\n", wtf->infile);
        fclose(fh);
        return -1;
    }
    fseek(fh, 0, SEEK_END);
    flen = ftell(fh);

    /* figure out whether to byteswap */
    flen = (flen / 4) - 1;
    if (flen != len) {
        /* First make sure this is an endianness problem, otherwise fail. */
        SWAP_INT32(&len);
        if (flen != len) {
            SWAP_INT32(&len);
            E_ERROR("Mismatch in header/file lengths: 0x%08x vs 0x%08x\n",
                    len, flen);
            return -1;
        }
        /* Set the input endianness to the opposite of the machine endianness... */
        cmd_ln_set_str_r(wtf->config, "-input_endian",
                         (0 == strcmp("big", cmd_ln_str_r(wtf->config, "-mach_endian"))
                          ? "little" : "big"));
    }
    
    fseek(fh, 4, SEEK_SET);
    wtf->infh = fh;
    if (cmd_ln_boolean_r(wtf->config, "-spec2cep")) {
        wtf->in_veclen = cmd_ln_int32_r(wtf->config, "-nfilt");
    }
    else if (cmd_ln_boolean_r(wtf->config, "-cep2spec")) {
        wtf->in_veclen = cmd_ln_int32_r(wtf->config, "-ncep");
        wtf->veclen = cmd_ln_int32_r(wtf->config, "-nfilt");
    }
    else {
        /* Should not happen. */
        E_ERROR("Sphinx MFCC file reading requested but -spec2cep/-cep2spec not given\n");
        assert(FALSE);
    }
            
    return TRUE;
}
Exemplo n.º 5
0
/*
  The only function which doesn't require switching in lm_read_dump
 */
static int32
lm_read_dump_tg_segbase(lm_t * lm, const char *file)
{
    int i, k;
    if (lm->n_tg > 0) {
        /* Trigram seg table size */
        k = lm_fread_int32(lm);
        if (k != (lm->n_bg + 1) / lm->bg_seg_sz + 1) {
            E_ERROR("Bad trigram seg table size: %d\n", k);
            return LM_FAIL;
        }

        /* Allocate and read trigram seg table */
        lm->tg_segbase = (int32 *) ckd_calloc(k, sizeof(int32));
        if (fread(lm->tg_segbase, sizeof(int32), k, lm->fp) != (size_t) k) {
            E_ERROR("fread(%s) failed\n", file);
            return LM_FAIL;
        }
        if (lm->byteswap) {
            for (i = 0; i < k; i++)
                SWAP_INT32(&(lm->tg_segbase[i]));
        }
        E_INFO("%8d trigram segtable entries (%d segsize)\n", k,
               lm->bg_seg_sz);
    }
    return LM_SUCCESS;
}
Exemplo n.º 6
0
static int32
lm_read_dump_tgprob(lm_t * lm, const char *file, int32 is32bits)
{
    int32 i;
    uint32 upper_limit;

    upper_limit = is32bits ? LM_SPHINX_CONSTANT : LM_LEGACY_CONSTANT;

    if (lm->n_tg > 0) {
        lm->n_tgprob = lm_fread_int32(lm);
        if ((lm->n_tgprob <= 0) || (lm->n_tgprob > upper_limit)) {
            E_ERROR("Bad trigram bowt table size: %d\n", lm->n_tgprob);
            return LM_FAIL;
        }

        /* Allocate and read trigram bowt table */
        lm->tgprob = (lmlog_t *) ckd_calloc(lm->n_tgprob, sizeof(lmlog_t));
        if (fread(lm->tgprob, sizeof(lmlog_t), lm->n_tgprob, lm->fp) !=
            (size_t) lm->n_tgprob) {
            E_ERROR("fread(%s) failed\n", file);
            return LM_FAIL;
        }
        if (lm->byteswap) {
            for (i = 0; i < lm->n_tgprob; i++)
                SWAP_INT32(&(lm->tgprob[i].l));
        }
        E_INFO("%8d trigram prob entries\n", lm->n_tgprob);
    }

    return LM_SUCCESS;
}
	void SWAP_FLOAT32_ARRAY (void* const array, dInt32 count)
	{
		dInt32* const ptr = (dInt32*) array;
		count /= sizeof (dInt32);
		for (dInt32 i = 0; i < count; i ++) {
			dInt32 x;
			x = SWAP_INT32 (ptr[i]);
			ptr[i] = x;
		}
	}
Exemplo n.º 8
0
static int32
lm_fread_int32(lm_t * lm)
{
    int32 val;

    if (fread(&val, sizeof(int32), 1, lm->fp) != 1)
        E_FATAL("fread failed\n");
    if (lm->byteswap)
        SWAP_INT32(&val);
    return (val);
}
Exemplo n.º 9
0
// This function checks whether the passed record is a header record, and,
// if it is, writes it to the header buffer.
// It also checks the run type for RHDR records.  It returns 0 if the record
// was not a RHDR, and the run type if it was.
uint32_t FillHeaderBuffer(nZDAB* const zrec){
  uint32_t runtype = 0;
  for(int i=0; i<headertypes; i++){
    if(zrec->bank_name == Headernames[i]){
      memset(header[i], 0, NWREC);
      // Add 9 words for the length of the nZDAB header itself
      // Copy the data to buffer in native format
      uint32_t recLen = zrec->data_words+9;
      memcpy(header[i], zrec, recLen*sizeof(uint32_t));
      // For RHDR's pull out run type to return
      if(i==0){
        RunRecord* rhdr = (RunRecord*) (zrec+1);
        SWAP_INT32(rhdr, 9);
        runtype = rhdr->RunMask;
        fprintf(stderr, "runtype: %d\n", runtype);
        SWAP_INT32(rhdr, 9);
      }
    }
  }
  return runtype;
}
Exemplo n.º 10
0
/**   
      20060303: ARCHAN

      lm_read_dump_header will read in the DMP format. What it will do
      is to compare the value read in with the darpa_hdr ("Darpa
      Trigram LM").  If it matches, that means there is no byte
      swap. If it doesn't, we will try to swap the value and match the
      header again.  If it still doesn't work, that means something is
      wrong. (e.g. Format problem of the DMP file).  

      This process will also allow us to know the byte-order of the
      DMP file. Swapping could then automatically done in the code. 
 */
static int32
lm_read_dump_header(lm_t * lm,             /**< The LM */
                    const char *file              /**< The file we are reading */
    )
{
    int32 k;
    char str[1024];

    /* Standard header string-size; set byteswap flag based on this */
    if (fread(&k, sizeof(int32), 1, lm->fp) != 1)
        E_FATAL("fread(%s) failed\n", file);

    if ((size_t) k == strlen(darpa_hdr) + 1)
        lm->byteswap = 0;
    else {
        SWAP_INT32(&k);
        if ((size_t) k == strlen(darpa_hdr) + 1)
            lm->byteswap = 1;
        else {
            SWAP_INT32(&k);
            E_INFO("Bad magic number: %d(%08x), not an LM dumpfile??\n", k,
                   k);
            return LM_FAIL;
        }
    }

    /* Read and verify standard header string */
    if (fread(str, sizeof(char), k, lm->fp) != (size_t) k) {
        E_ERROR("fread(%s) failed\n", file);
        return LM_FAIL;
    }
    if (strncmp(str, darpa_hdr, k) != 0) {
        E_ERROR("Bad header\n");
        return LM_FAIL;
    }

    return LM_SUCCESS;

}
Exemplo n.º 11
0
/**
 * Output HTK format header.
 */
static int
output_header_htk(sphinx_wave2feat_t *wtf, int32 nfloat)
{
    int32 samp_period;
    int16 samp_size;
    int16 param_kind;
    int swap = FALSE;

    /* HTK files are big-endian. */
    if (0 == strcmp("little", cmd_ln_str_r(wtf->config, "-mach_endian")))
        swap = TRUE;
    /* Same file size thing as in Sphinx files (I think) */
    if (swap) SWAP_INT32(&nfloat);
    if (fwrite(&nfloat, 4, 1, wtf->outfh) != 1)
        return -1;
    /* Sample period in 100ns units. */
    samp_period = (int32)(1e+7 / cmd_ln_float32_r(wtf->config, "-frate"));
    if (swap) SWAP_INT32(&samp_period);
    if (fwrite(&samp_period, 4, 1, wtf->outfh) != 1)
        return -1;
    /* Sample size - veclen * sizeof each sample. */
    samp_size = wtf->veclen * 4;
    if (swap) SWAP_INT16(&samp_size);
    if (fwrite(&samp_size, 2, 1, wtf->outfh) != 1)
        return -1;
    /* Format and flags. */
    if (cmd_ln_boolean_r(wtf->config, "-logspec")
        || cmd_ln_boolean_r(wtf->config, "-cep2spec"))
        param_kind = FBANK; /* log mel-filter bank outputs */
    else
        param_kind = MFCC | _O; /* MFCC + CEP0 (note reordering...) */
    if (swap) SWAP_INT16(&param_kind);
    if (fwrite(&param_kind, 2, 1, wtf->outfh) != 1)
        return -1;

    return 0;
}
Exemplo n.º 12
0
int32
swap_check(FILE *fp)
{
    uint32 magic;
    uint32 ret = -1;

    if (fread_retry(&magic, sizeof(uint32), 1, fp) != 1) {
	E_ERROR("error while reading bo_magic\n");

	ret = -1;
	
	return ret;
    }

    if (magic != BYTE_ORDER_MAGIC) {
	/* either need to swap or got bogus magic number */
	
	SWAP_INT32(&magic);
	
	if (magic == BYTE_ORDER_MAGIC) {
	    ret = 1;
	}
	else {
	    /* could not get magic number by swapping, so it is bogus */
	    E_ERROR("Expected to read 0x%x (or byte permuted) byte order indicator.  Instead read 0x%x\n",
		    BYTE_ORDER_MAGIC, SWAP_INT32(&magic));
	    
	    ret = -1;
	}
    }
    else {
	/* BYTE_ORDER_MAGIC was read; so no need to swap */
	ret = 0;
    }

    return ret;
}
Exemplo n.º 13
0
int
acmod_end_utt(acmod_t *acmod)
{
    int32 nfr = 0;

    acmod->state = ACMOD_ENDED;
    if (acmod->n_mfc_frame < acmod->n_mfc_alloc) {
        int inptr;
        /* Where to start writing them (circular buffer) */
        inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc;
        /* nfr is always either zero or one. */
        fe_end_utt(acmod->fe, acmod->mfc_buf[inptr], &nfr);
        acmod->n_mfc_frame += nfr;
        
        /* Process whatever's left, and any leadout or update stats if needed. */
        if (nfr)
            nfr = acmod_process_mfcbuf(acmod);
        else
            feat_update_stats(acmod->fcb);
    }
    if (acmod->mfcfh) {
        long outlen;
        int32 rv;
        outlen = (ftell(acmod->mfcfh) - 4) / 4;
        if (!WORDS_BIGENDIAN)
            SWAP_INT32(&outlen);
        /* Try to seek and write */
        if ((rv = fseek(acmod->mfcfh, 0, SEEK_SET)) == 0) {
            fwrite(&outlen, 4, 1, acmod->mfcfh);
        }
        fclose(acmod->mfcfh);
        acmod->mfcfh = NULL;
    }
    if (acmod->rawfh) {
        fclose(acmod->rawfh);
        acmod->rawfh = NULL;
    }

    if (acmod->senfh) {
        fclose(acmod->senfh);
        acmod->senfh = NULL;
    }

    return nfr;
}
Exemplo n.º 14
0
int
read_cep(char const *file, float ***cep, int *numframes, int cepsize)
{
    FILE *fp;
    int n_float;
    struct stat statbuf;
    int i, n, byterev, sf, ef;
    float32 **mfcbuf;

    if (stat_retry(file, &statbuf) < 0) {
        printf("stat(%s) failed\n", file);
        return IO_ERR;
    }

    if ((fp = fopen(file, "rb")) == NULL) {
        printf("fopen(%s, rb) failed\n", file);
        return IO_ERR;
    }

    /* Read #floats in header */
    if (fread(&n_float, sizeof(int), 1, fp) != 1) {
        fclose(fp);
        return IO_ERR;
    }

    /* Check if n_float matches file size */
    byterev = FALSE;
    if ((int) (n_float * sizeof(float) + 4) != statbuf.st_size) {
        n = n_float;
        SWAP_INT32(&n);

        if ((int) (n * sizeof(float) + 4) != statbuf.st_size) {
            printf
                ("Header size field: %d(%08x); filesize: %d(%08x)\n",
                 n_float, n_float, (int) statbuf.st_size,
                 (int) statbuf.st_size);
            fclose(fp);
            return IO_ERR;
        }

        n_float = n;
        byterev = TRUE;
    }
    if (n_float <= 0) {
        printf("Header size field: %d\n", n_float);
        fclose(fp);
        return IO_ERR;
    }

    /* n = #frames of input */
    n = n_float / cepsize;
    if (n * cepsize != n_float) {
        printf("Header size field: %d; not multiple of %d\n",
               n_float, cepsize);
        fclose(fp);
        return IO_ERR;
    }
    sf = 0;
    ef = n;

    mfcbuf = (float **) ckd_calloc_2d(n, cepsize, sizeof(float32));

    /* Read mfc data and byteswap if necessary */
    n_float = n * cepsize;
    if ((int) fread(mfcbuf[0], sizeof(float), n_float, fp) != n_float) {
        printf("Error reading mfc data\n");
        fclose(fp);
        return IO_ERR;
    }
    if (byterev) {
        for (i = 0; i < n_float; i++)
            SWAP_FLOAT32(&(mfcbuf[0][i]));
    }
    fclose(fp);

    *numframes = n;
    *cep = mfcbuf;
    return IO_SUCCESS;
}
Exemplo n.º 15
0
int32 cep_read_bin (float32 **buf, int32 *len, char const *file)
{
  int32 fd, floatCount, floatBytes, readBytes;
  int32 byteReverse = FALSE;
  int32 i, cnt;
  struct stat st_buf;

#ifdef WIN32
  fd = open(file, O_RDONLY|O_BINARY, 0644);
#else
  fd = open(file, O_RDONLY, 0644);
#endif

  if (fd < 0) {
    E_ERROR("Couldn't open %s\n", file);
    return errno;
  }

  /* assume float count file */
  if (read(fd, (char *)&floatCount, sizeof(int32)) != sizeof(int32))
    return errno;

  if (fstat (fd, &st_buf) < 0) {
    perror("cep_read_bin: fstat failed");
    return errno;
  }

  /*  printf("Float count: %d st_buf.st_size: %d \n", floatCount, st_buf.st_size); */

  /*
   * Check if this is a byte reversed file !
   */
  if ((floatCount+4 != st_buf.st_size) &&
      ((floatCount * sizeof(float32) + 4) != st_buf.st_size)) {
	E_INFO("Byte reversing %s\n", file);
	byteReverse = TRUE;
	SWAP_INT32(&floatCount);
  }

  if (floatCount + 4 == st_buf.st_size) {
	floatBytes = floatCount;
	floatCount /= sizeof (float32);
  }
  else if (floatCount * sizeof(float32) + 4 == st_buf.st_size)
	floatBytes = floatCount * sizeof(float32);
  else {
      E_ERROR("Header count %d does not match file size %d\n",
	      floatCount, st_buf.st_size);
      return -1;
  }

  /* malloc size to account for possibility of being a float count file */
  if ((*buf = (float32 *)malloc(floatBytes)) == NULL)
    return errno;
  readBytes = read(fd, (char *)*buf, floatBytes);

  if (readBytes != floatBytes) {
    /* float count is actually a byte count of the file */
    return errno;
  }
  *len = readBytes;
  /*
   * Reorder the bytes if needed
   */
  cnt = readBytes >> 2;
  if (byteReverse) {
	uint32 *ptr = (uint32 *) *buf;
	for (i = 0; i < cnt; i++)
	    SWAP_INT32(&ptr[i]);
  }
  if (close(fd) != ESUCCESS) return errno;
  return ESUCCESS;
}  
Exemplo n.º 16
0
int
share_ctl_next_utts(uint32 *offset, uint32 *run)
{
    uint32 magic;
    uint32 i_offset;
    uint32 o_offset;
    uint32 ns;
    int ret;
    struct flock lck;

    lck.l_type = F_WRLCK;
    lck.l_whence = SEEK_SET;
    lck.l_start = 0;
    lck.l_len = 2 * sizeof(uint32);

    /* Get an exclusive lock on the file */
    if (fcntl(state_fd, F_SETLKW, &lck) < 0) {
	E_FATAL_SYSTEM("file lock failed");
    }

    printf("*** in crit sec ***\n");

    /* read the byte order magic number */
    ret = read(state_fd, &magic, sizeof(uint32));
    if (ret < 0)
	E_FATAL_SYSTEM("read failed");
    else if (ret < sizeof(uint32)) {
	E_FATAL("Expected to read %u bytes, but got %u instead\n",
		sizeof(uint32), ret);
    }
	       

    if (magic != BYTE_ORDER_MAGIC) {
	/* seems to need swap */

	SWAP_INT32(&magic);

	if (magic != BYTE_ORDER_MAGIC) {
	    E_FATAL("Couldn't create magic # by swapping\n");
	}

	ns = TRUE;
    }
    else
	ns = FALSE;


    ret = read(state_fd, &i_offset, sizeof(uint32));
    if (ret < 0)
	E_FATAL_SYSTEM("read failed");
    else if (ret < sizeof(uint32)) {
	E_FATAL("Expected to read %u bytes, but got %u instead\n",
		sizeof(uint32), ret);
    }
    
    if (ns)
	SWAP_INT32(&i_offset);

    o_offset = i_offset + blk_sz;

    printf("i_offset= %u\n", i_offset);

    sleep(30);

    lseek(state_fd, (off_t)0, SEEK_SET);

    if (write(state_fd, &magic, sizeof(uint32)) < 0)
	E_FATAL_SYSTEM("write failed");
    
    if (write(state_fd, &o_offset, sizeof(uint32)) < 0)
	E_FATAL_SYSTEM("write failed");
    
    lck.l_type = F_UNLCK;

    if (fcntl(state_fd, F_SETLK, &lck) < 0) {
	E_FATAL_SYSTEM("file unlock failed");
    }

    lseek(state_fd, (off_t)0, SEEK_SET);

    printf("*** out crit sec ***\n");

    *offset = i_offset;
    *run = blk_sz;

    return S3_SUCCESS;
}
Exemplo n.º 17
0
static int32
read_sendump(s2_semi_mgau_t *s, mdef_t *mdef, char const *file)
{
    FILE *fp;
    char line[1000];
    int32 i, n;
    int32 do_swap, do_mmap;
    size_t filesize, offset;
    int n_clust = 256;          /* Number of clusters (if zero, we are just using
                                 * 8-bit quantized weights) */
    int r = s->n_density;
    int c = mdef_n_sen(mdef);

    s->n_sen = c;
    do_mmap = cmd_ln_boolean_r(s->config, "-mmap");

    if ((fp = fopen(file, "rb")) == NULL)
        return -1;

    E_INFO("Loading senones from dump file %s\n", file);
    /* Read title size, title */
    fread(&n, sizeof(int32), 1, fp);
    /* This is extremely bogus */
    do_swap = 0;
    if (n < 1 || n > 999) {
        SWAP_INT32(&n);
        if (n < 1 || n > 999) {
            E_FATAL("Title length %x in dump file %s out of range\n", n, file);
        }
        do_swap = 1;
    }
    if (fread(line, sizeof(char), n, fp) != n)
        E_FATAL("Cannot read title\n");
    if (line[n - 1] != '\0')
        E_FATAL("Bad title in dump file\n");
    E_INFO("%s\n", line);

    /* Read header size, header */
    fread(&n, 1, sizeof(n), fp);
    if (do_swap) SWAP_INT32(&n);
    if (fread(line, sizeof(char), n, fp) != n)
        E_FATAL("Cannot read header\n");
    if (line[n - 1] != '\0')
        E_FATAL("Bad header in dump file\n");

    /* Read other header strings until string length = 0 */
    for (;;) {
        fread(&n, 1, sizeof(n), fp);
        if (do_swap) SWAP_INT32(&n);
        if (n == 0)
            break;
        if (fread(line, sizeof(char), n, fp) != n)
            E_FATAL("Cannot read header\n");
        /* Look for a cluster count, if present */
        if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) {
            n_clust = atoi(line + strlen("cluster_count "));
        }
    }

    /* Read #codewords, #pdfs */
    fread(&r, 1, sizeof(r), fp);
    if (do_swap) SWAP_INT32(&r);
    fread(&c, 1, sizeof(c), fp);
    if (do_swap) SWAP_INT32(&c);
    E_INFO("Rows: %d, Columns: %d\n", r, c);

    if (n_clust) {
	E_ERROR ("Dump file is incompatible with PocketSphinx\n");
	fclose(fp);
	return -1;
    }
    if (do_mmap) {
            E_INFO("Using memory-mapped I/O for senones\n");
    }
    offset = ftell(fp);
    fseek(fp, 0, SEEK_END);
    filesize = ftell(fp);
    fseek(fp, offset, SEEK_SET);

    /* Allocate memory for pdfs (or memory map them) */
    if (do_mmap)
        s->sendump_mmap = mmio_file_read(file);

    /* Otherwise, set up all pointers, etc. */
    if (s->sendump_mmap) {
        s->mixw = ckd_calloc(s->n_feat, sizeof(*s->mixw));
        for (i = 0; i < s->n_feat; i++) {
            /* Pointers into the mmap()ed 2d array */
	    s->mixw[i] = ckd_calloc(r, sizeof(**s->mixw));
        }

        for (n = 0; n < s->n_feat; n++) {
            for (i = 0; i < r; i++) {
                s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
                offset += c;
            }
        }
    }
    else {
        s->mixw = ckd_calloc_3d(s->n_feat, r, c, sizeof(***s->mixw));
        /* Read pdf values and ids */
        for (n = 0; n < s->n_feat; n++) {
            for (i = 0; i < r; i++) {
                if (fread(s->mixw[n][i], sizeof(***s->mixw), c, fp) != (size_t) c) {
                    E_ERROR("Failed to read %d bytes from sendump\n", c);
                    return -1;
                }
            }
        }
    }

    fclose(fp);
    return 0;
}
Exemplo n.º 18
0
main (int argc, char *argv[])
{
    struct stat statbuf;
    char *file;
    FILE *fp;
    int32 byterev;
    int32 i, n, n_float32, fr, numbered, sd, ed, smooth;
    float32 cep[13], cep_1[13], cep_2[13], c;	/* Hack!! Hardwired 13 */
    
    if (argc < 2)
	usagemsg (argv[0]);

    numbered = 0;
    file = NULL;
    sd = -1;	/* Start and end dimensions to output */
    ed = -1;
    smooth = 0;
    for (i = 1; i < argc; i++) {
	if (argv[i][0] == '-') {
	    switch (argv[i][1]) {
	    case 'n':
		if (numbered)
		    usagemsg (argv[0]);
		numbered = 1;
		break;
	    case 'l':
		if (smooth)
		    usagemsg (argv[0]);
		smooth = 1;
		break;
	    case 's':
		if ((sd >= 0) || (sscanf (&(argv[i][2]), "%d", &sd) != 1) ||
		    (sd < 0) || (sd > 12))
		    usagemsg (argv[0]);
		break;
	    case 'e':
		if ((ed >= 0) || (sscanf (&(argv[i][2]), "%d", &ed) != 1) ||
		    (ed < 0) || (ed > 12))
		    usagemsg (argv[0]);
		break;
	    default:
		usagemsg (argv[0]);
		break;
	    }
	} else {
	    if (file)
		usagemsg (argv[0]);
	    file = argv[i];
	}
    }
    if (! file)
	usagemsg (argv[0]);
    if (sd < 0)
	sd = 0;
    if (ed < 0)
	ed = 12;
    
    if (stat (file, &statbuf) != 0) {
	E_ERROR("stat(%s) failed\n", file);
	return -1;
    }
    
    if ((fp = fopen(file, "rb")) == NULL) {
	E_ERROR("fopen(%s,rb) failed\n", file);
	return -1;
    }
    
    /* Read #floats in header */
    if (fread_retry (&n_float32, sizeof(int32), 1, fp) != 1) {
	fclose (fp);
	return -1;
    }
    
    /* Check of n_float32 matches file size */
    byterev = FALSE;
    if ((n_float32*sizeof(float32) + 4) != statbuf.st_size) {
	n = n_float32;
	SWAP_INT32(&n);

	if ((n*sizeof(float32) + 4) != statbuf.st_size) {
	    E_ERROR("Header size field: %d(%08x); filesize: %d(%08x)\n",
		    n_float32, n_float32, statbuf.st_size, statbuf.st_size);
	    fclose (fp);
	    return -1;
	}

	n_float32 = n;
	byterev = TRUE;
    }
    if (n_float32 <= 0) {
	E_ERROR("Header size field: %d\n",  n_float32);
	fclose (fp);
	return -1;
    }
    if (byterev)
	E_INFO("Byte-reversing %s\n", file);
    
    E_INFO("Dimensions %d..%d%s\n", sd, ed, (smooth ? " (LPFed)" : ""));

    fr = 0;
    while (fread (cep, sizeof(float32), 13, fp) == 13) {
	if (byterev) {
	    for (i = 0; i < 13; i++) {
		SWAP_FLOAT32(cep+i);
	    }
	}
	
	if (smooth) {
	    /* Smoothing (LPF-ing): 0.25, 0.5, 0.25; except at the ends: 0.5, 0.5 */
	    if ((fr > 0) && numbered)
		printf ("%10d ", fr-1);
	    
	    if (fr == 1) {
		for (i = sd; i <= ed; i++) {
		    c = 0.5 * cep_1[i] + 0.5 * cep[i];
		    printf (" %11.7f", c);
		}
	    } else if (fr > 1) {
		for (i = sd; i <= ed; i++) {
		    c = 0.25 * cep_2[i] + 0.5 * cep_1[i] + 0.25 * cep[i];
		    printf (" %11.7f", c);
		}
	    }

	    memcpy (cep_2, cep_1, sizeof(float32) * 13);
	    memcpy (cep_1, cep, sizeof(float32) * 13);
	} else {
	    if (numbered)
		printf ("%10d ", fr);
	    
	    for (i = sd; i <= ed; i++) {
		printf (" %11.7f", cep[i]);
	    }
	}
	
	if ((! smooth) || (fr > 0))
	    printf ("\n");
	fflush (stdout);
	
	fr++;
    }
    
    if (smooth && (fr > 0)) {
	if (numbered)
	    printf ("%10d ", fr-1);
	
	if (fr > 1) {
	    for (i = sd; i <= ed; i++) {
		c = 0.5 * cep_2[i] + 0.5 * cep_1[i];
		printf (" %11.7f", c);
	    }
	} else if (fr == 1) {
	    for (i = sd; i <= ed; i++)
		printf (" %11.7f", cep_1[i]);
	}

	printf ("\n");
	fflush (stdout);
    }
}
/*
 * Write state segmentation in Sphinx-II format.  (Must be written in BIG-ENDIAN
 * format!)
 */
static void write_s2stseg (char *dir, align_stseg_t *stseg, char *uttid, char *ctlspec)
{
    char filename[1024];
    FILE *fp;
    align_stseg_t *tmp;
    int32 k;
    s3cipid_t ci[3];
    word_posn_t wpos;
    int16 s2_info;
    char buf[8];
    static int32 byterev = -1;	/* Whether to byte reverse output data */
    
    build_output_uttfile (filename, dir, uttid, ctlspec);
    strcat (filename, ".v8_seg");		/* .v8_seg for compatibility */
    E_INFO("Writing Sphinx-II format state segmentation to: %s\n", filename);
    if ((fp = fopen (filename, "wb")) == NULL) {
	E_ERROR("fopen(%s,wb) failed\n", filename);
	return;
    }

    if (byterev < 0) {
	/* Byte ordering of host machine unknown; first figure it out */
	k = (int32) BYTE_ORDER_MAGIC;
	if (fwrite (&k, sizeof(int32), 1, fp) != 1)
	    goto write_error;

	fclose (fp);
	if ((fp = fopen (filename, "rb")) == NULL) {
	    E_ERROR ("fopen(%s,rb) failed\n", filename);
	    return;
	}
	if (fread (buf, 1, sizeof(int32), fp) != sizeof(int32)) {
	    E_ERROR ("fread(%s) failed\n", filename);
	    return;
	}
	fclose (fp);
	
	/* If buf[0] == lsB of BYTE_ORDER_MAGIC, we are little-endian.  Need to byterev */
	byterev = (buf[0] == (BYTE_ORDER_MAGIC & 0x000000ff)) ? 1 : 0;

	if ((fp = fopen (filename, "wb")) == NULL) {
	    E_ERROR("fopen(%s,wb) failed\n", filename);
	    return;
	}
    }
    
    /* Write #frames */
    for (k = 0, tmp = stseg; tmp; k++, tmp = tmp->next);
    if (byterev)
	SWAP_INT32(&k);
    if (fwrite (&k, sizeof(int32), 1, fp) != 1)
	goto write_error;
    
    /* Write state info for each frame */
    for (; stseg; stseg = stseg->next) {
	mdef_phone_components (mdef, stseg->pid, ci, &(ci[1]), &(ci[2]), &wpos);

	s2_info = ci[0] * mdef->n_emit_state + stseg->state;
	if (stseg->start)
	    s2_info |= 0x8000;
	if (byterev)
	    SWAP_INT16(&s2_info);
	
	if (fwrite (&s2_info, sizeof(int16), 1, fp) != 1)
	    goto write_error;
    }
    
    fclose (fp);
    return;
    
write_error:
    E_ERROR("fwrite(%s) failed\n", filename);
    fclose (fp);
}
Exemplo n.º 20
0
bin_mdef_t *
bin_mdef_read(cmd_ln_t *config, const char *filename)
{
    bin_mdef_t *m;
    FILE *fh;
    size_t tree_start;
    int32 val, i, swap, pos, end;
    int32 *sseq_size;
    int do_mmap;

    /* Try to read it as text first. */
    if ((m = bin_mdef_read_text(config, filename)) != NULL)
        return m;

    E_INFO("Reading binary model definition: %s\n", filename);
    if ((fh = fopen(filename, "rb")) == NULL)
        return NULL;

    if (fread(&val, 4, 1, fh) != 1) {
        fclose(fh);
        E_ERROR_SYSTEM("Failed to read byte-order marker from %s\n",
                       filename);
        return NULL;
    }
    swap = 0;
    if (val == BIN_MDEF_OTHER_ENDIAN) {
        swap = 1;
        E_INFO("Must byte-swap %s\n", filename);
    }
    if (fread(&val, 4, 1, fh) != 1) {
        fclose(fh);
        E_ERROR_SYSTEM("Failed to read version from %s\n", filename);
        return NULL;
    }
    if (swap)
        SWAP_INT32(&val);
    if (val > BIN_MDEF_FORMAT_VERSION) {
        E_ERROR("File format version %d for %s is newer than library\n",
                val, filename);
        fclose(fh);
        return NULL;
    }
    if (fread(&val, 4, 1, fh) != 1) {
        fclose(fh);
        E_ERROR_SYSTEM("Failed to read header length from %s\n", filename);
        return NULL;
    }
    if (swap)
        SWAP_INT32(&val);
    /* Skip format descriptor. */
    fseek(fh, val, SEEK_CUR);

    /* Finally allocate it. */
    m = ckd_calloc(1, sizeof(*m));
    m->refcnt = 1;

    /* Check these, to make gcc/glibc shut up. */
#define FREAD_SWAP32_CHK(dest)                                          \
    if (fread((dest), 4, 1, fh) != 1) {                                 \
        fclose(fh);                                                     \
        ckd_free(m);                                                    \
        E_ERROR_SYSTEM("Failed to read %s from %s\n", #dest, filename); \
        return NULL;                                                    \
    }                                                                   \
    if (swap) SWAP_INT32(dest);
    
    FREAD_SWAP32_CHK(&m->n_ciphone);
    FREAD_SWAP32_CHK(&m->n_phone);
    FREAD_SWAP32_CHK(&m->n_emit_state);
    FREAD_SWAP32_CHK(&m->n_ci_sen);
    FREAD_SWAP32_CHK(&m->n_sen);
    FREAD_SWAP32_CHK(&m->n_tmat);
    FREAD_SWAP32_CHK(&m->n_sseq);
    FREAD_SWAP32_CHK(&m->n_ctx);
    FREAD_SWAP32_CHK(&m->n_cd_tree);
    FREAD_SWAP32_CHK(&m->sil);

    /* CI names are first in the file. */
    m->ciname = ckd_calloc(m->n_ciphone, sizeof(*m->ciname));

    /* Decide whether to read in the whole file or mmap it. */
    do_mmap = config ? cmd_ln_boolean_r(config, "-mmap") : TRUE;
    if (swap) {
        E_WARN("-mmap specified, but mdef is other-endian.  Will not memory-map.\n");
        do_mmap = FALSE;
    } 
    /* Actually try to mmap it. */
    if (do_mmap) {
        m->filemap = mmio_file_read(filename);
        if (m->filemap == NULL)
            do_mmap = FALSE;
    }
    pos = ftell(fh);
    if (do_mmap) {
        /* Get the base pointer from the memory map. */
        m->ciname[0] = (char *)mmio_file_ptr(m->filemap) + pos;
        /* Success! */
        m->alloc_mode = BIN_MDEF_ON_DISK;
    }
    else {
        /* Read everything into memory. */
        m->alloc_mode = BIN_MDEF_IN_MEMORY;
        fseek(fh, 0, SEEK_END);
        end = ftell(fh);
        fseek(fh, pos, SEEK_SET);
        m->ciname[0] = ckd_malloc(end - pos);
        if (fread(m->ciname[0], 1, end - pos, fh) != end - pos)
            E_FATAL("Failed to read %d bytes of data from %s\n", end - pos, filename);
    }

    for (i = 1; i < m->n_ciphone; ++i)
        m->ciname[i] = m->ciname[i - 1] + strlen(m->ciname[i - 1]) + 1;

    /* Skip past the padding. */
    tree_start =
        m->ciname[i - 1] + strlen(m->ciname[i - 1]) + 1 - m->ciname[0];
    tree_start = (tree_start + 3) & ~3;
    m->cd_tree = (cd_tree_t *) (m->ciname[0] + tree_start);
    if (swap) {
        for (i = 0; i < m->n_cd_tree; ++i) {
            SWAP_INT16(&m->cd_tree[i].ctx);
            SWAP_INT16(&m->cd_tree[i].n_down);
            SWAP_INT32(&m->cd_tree[i].c.down);
        }
    }
    m->phone = (mdef_entry_t *) (m->cd_tree + m->n_cd_tree);
    if (swap) {
        for (i = 0; i < m->n_phone; ++i) {
            SWAP_INT32(&m->phone[i].ssid);
            SWAP_INT32(&m->phone[i].tmat);
        }
    }
    sseq_size = (int32 *) (m->phone + m->n_phone);
    if (swap)
        SWAP_INT32(sseq_size);
    m->sseq = ckd_calloc(m->n_sseq, sizeof(*m->sseq));
    m->sseq[0] = (uint16 *) (sseq_size + 1);
    if (swap) {
        for (i = 0; i < *sseq_size; ++i)
            SWAP_INT16(m->sseq[0] + i);
    }
    if (m->n_emit_state) {
        for (i = 1; i < m->n_sseq; ++i)
            m->sseq[i] = m->sseq[0] + i * m->n_emit_state;
    }
    else {
        m->sseq_len = (uint8 *) (m->sseq[0] + *sseq_size);
        for (i = 1; i < m->n_sseq; ++i)
            m->sseq[i] = m->sseq[i - 1] + m->sseq_len[i - 1];
    }

    /* Now build the CD-to-CI mappings using the senone sequences.
     * This is the only really accurate way to do it, though it is
     * still inaccurate in the case of heterogeneous topologies or
     * cross-state tying. */
    m->cd2cisen = (int16 *) ckd_malloc(m->n_sen * sizeof(*m->cd2cisen));
    m->sen2cimap = (int16 *) ckd_malloc(m->n_sen * sizeof(*m->sen2cimap));

    /* Default mappings (identity, none) */
    for (i = 0; i < m->n_ci_sen; ++i)
        m->cd2cisen[i] = i;
    for (; i < m->n_sen; ++i)
        m->cd2cisen[i] = -1;
    for (i = 0; i < m->n_sen; ++i)
        m->sen2cimap[i] = -1;
    for (i = 0; i < m->n_phone; ++i) {
        int32 j, ssid = m->phone[i].ssid;

        for (j = 0; j < bin_mdef_n_emit_state_phone(m, i); ++j) {
            int s = bin_mdef_sseq2sen(m, ssid, j);
            int ci = bin_mdef_pid2ci(m, i);
            /* Take the first one and warn if we have cross-state tying. */
            if (m->sen2cimap[s] == -1)
                m->sen2cimap[s] = ci;
            if (m->sen2cimap[s] != ci)
                E_WARN
                    ("Senone %d is shared between multiple base phones\n",
                     s);

            if (j > bin_mdef_n_emit_state_phone(m, ci))
                E_WARN("CD phone %d has fewer states than CI phone %d\n",
                       i, ci);
            else
                m->cd2cisen[s] =
                    bin_mdef_sseq2sen(m, m->phone[ci].ssid, j);
        }
    }

    /* Set the silence phone. */
    m->sil = bin_mdef_ciphone_id(m, S3_SILENCE_CIPHONE);

    E_INFO
        ("%d CI-phone, %d CD-phone, %d emitstate/phone, %d CI-sen, %d Sen, %d Sen-Seq\n",
         m->n_ciphone, m->n_phone - m->n_ciphone, m->n_emit_state,
         m->n_ci_sen, m->n_sen, m->n_sseq);
    fclose(fh);
    return m;
}
Exemplo n.º 21
0
ngram_model_t *
ngram_model_trie_read_dmp(cmd_ln_t * config,
                          const char *file_name, logmath_t * lmath)
{
    uint8 do_swap;
    int32 is_pipe;
    int32 k;
    uint32 j;
    int32 vn, ts;
    int32 count;
    uint32 counts[3];
    uint32 fixed_counts[3];
    uint32 *unigram_next;
    int i, order;
    char str[1024];
    FILE *fp;
    ngram_model_trie_t *model;
    ngram_model_t *base;
    ngram_raw_t **raw_ngrams;

    E_INFO("Trying to read LM in DMP format\n");
    if ((fp = fopen_comp(file_name, "rb", &is_pipe)) == NULL) {
        E_ERROR("Dump file %s not found\n", file_name);
        return NULL;
    }

    do_swap = FALSE;
    fread(&k, sizeof(k), 1, fp);
    if (k != strlen(dmp_hdr) + 1) {
        SWAP_INT32(&k);
        if (k != strlen(dmp_hdr) + 1) {
            E_ERROR
                ("Wrong magic header size number %x: %s is not a dump file\n",
                 k, file_name);
            return NULL;
        }
        do_swap = 1;
    }
    if (fread(str, 1, k, fp) != (size_t) k) {
        E_ERROR("Cannot read header\n");
        return NULL;
    }
    if (strncmp(str, dmp_hdr, k) != 0) {
        E_ERROR("Wrong header %s: %s is not a dump file\n", dmp_hdr);
        return NULL;
    }

    if (fread(&k, sizeof(k), 1, fp) != 1)
        return NULL;
    if (do_swap)
        SWAP_INT32(&k);
    if (fread(str, 1, k, fp) != (size_t) k) {
        E_ERROR("Cannot read LM filename in header\n");
        return NULL;
    }

    /* read version#, if present (must be <= 0) */
    if (fread(&vn, sizeof(vn), 1, fp) != 1)
        return NULL;
    if (do_swap)
        SWAP_INT32(&vn);
    if (vn <= 0) {
        /* read and don't compare timestamps (we don't care) */
        if (fread(&ts, sizeof(ts), 1, fp) != 1)
            return NULL;
        if (do_swap)
            SWAP_INT32(&ts);

        /* read and skip format description */
        for (;;) {
            if (fread(&k, sizeof(k), 1, fp) != 1)
                return NULL;
            if (do_swap)
                SWAP_INT32(&k);
            if (k == 0)
                break;
            if (fread(str, 1, k, fp) != (size_t) k) {
                E_ERROR("Failed to read word\n");
                return NULL;
            }
        }
        /* read model->ucount */
        if (fread(&count, sizeof(count), 1, fp) != 1)
            return NULL;
        if (do_swap)
            SWAP_INT32(&count);
        counts[0] = count;
    }
    else {
        counts[0] = vn;
    }
    /* read model->bcount, tcount */
    if (fread(&count, sizeof(count), 1, fp) != 1)
        return NULL;
    if (do_swap)
        SWAP_INT32(&count);
    counts[1] = count;
    if (fread(&count, sizeof(count), 1, fp) != 1)
        return NULL;
    if (do_swap)
        SWAP_INT32(&count);
    counts[2] = count;
    E_INFO("ngrams 1=%d, 2=%d, 3=%d\n", counts[0], counts[1], counts[2]);

    model = (ngram_model_trie_t *) ckd_calloc(1, sizeof(*model));
    base = &model->base;
    if (counts[2] > 0)
        order = 3;
    else if (counts[1] > 0)
        order = 2;
    else
        order = 1;
    ngram_model_init(base, &ngram_model_trie_funcs, lmath, order,
                     (int32) counts[0]);

    model->trie = lm_trie_create(counts[0], order);

    unigram_next =
        (uint32 *) ckd_calloc((int32) counts[0] + 1, sizeof(unigram_next));
    for (j = 0; j <= (int32) counts[0]; j++) {
        int32 bigrams;
        dmp_weight_t weight;
        /* Skip over the mapping ID, we don't care about it. */
        fread(&bigrams, sizeof(int32), 1, fp);
        /* Read the weights from actual unigram structure. */
        fread(&weight, sizeof(weight), 1, fp);
        if (do_swap)
            SWAP_INT32(&weight.l);
        weight.f = logmath_log10_to_log_float(lmath, weight.f);
        model->trie->unigrams[j].prob = weight.f;
        fread(&weight, sizeof(weight), 1, fp);
        if (do_swap)
            SWAP_INT32(&weight.l);
        weight.f = logmath_log10_to_log_float(lmath, weight.f);
        model->trie->unigrams[j].bo = weight.f;
        //store pointer to dmp next to recognize wid
        fread(&bigrams, sizeof(int32), 1, fp);
        if (do_swap)
            SWAP_INT32(&bigrams);
        model->trie->unigrams[j].next = bigrams;
        unigram_next[j] = bigrams;
    }

    if (order > 1) {
        raw_ngrams =
            ngrams_raw_read_dmp(fp, lmath, counts, order, unigram_next,
                                do_swap);
        ngrams_raw_fix_counts(raw_ngrams, counts, fixed_counts, order);
        for (i = 0; i < order; i++) {
            base->n_counts[i] = fixed_counts[i];
        }

        //build reversed trie
        lm_trie_alloc_ngram(model->trie, order > 2 ? fixed_counts : counts,
                            order);
        lm_trie_build(model->trie, raw_ngrams, counts, order);
        counts[1]++;

        //free raw ngrams
        ngrams_raw_free(raw_ngrams, counts, order);
    }
    ckd_free(unigram_next);

    /* read ascii word strings */
    read_word_str(base, fp);

    fclose_comp(fp, is_pipe);
    return base;
}
static int
read_riff_header(FILE *infh)
{
    char id[4];
    int32 intval, header_len;
    int16 shortval;

    /* RIFF files are little-endian by definition. */
    cmd_ln_set_str("-input_endian", "little");

    /* Read in all the header chunks and etcetera. */
    TRY_FREAD(id, 1, 4, infh);
    /* Total file length (we don't care) */
    TRY_FREAD(&intval, 4, 1, infh);
    /* 'WAVE' */
    TRY_FREAD(id, 1, 4, infh);
    if (0 != memcmp(id, "WAVE", 4)) {
        E_ERROR("This is not a WAVE file\n");
        goto error_out;
    }
    /* 'fmt ' */
    TRY_FREAD(id, 1, 4, infh);
    if (0 != memcmp(id, "fmt ", 4)) {
        E_ERROR("Format chunk missing\n");
        goto error_out;
    }
    /* Length of 'fmt ' chunk */
    TRY_FREAD(&intval, 4, 1, infh);
    SWAP_INT32(&intval);
    header_len = intval;

    /* Data format. */
    TRY_FREAD(&shortval, 2, 1, infh);
    SWAP_INT16(&shortval);
    if (shortval != 1) { /* PCM */
        E_ERROR("WAVE file is not in PCM format\n");
        goto error_out;
    }

    /* Number of channels. */
    TRY_FREAD(&shortval, 2, 1, infh);
    SWAP_INT16(&shortval);
    if (shortval != 1) { /* PCM */
        E_ERROR("WAVE file is not single channel\n");
        goto error_out;
    }

    /* Sampling rate (finally!) */
    TRY_FREAD(&intval, 4, 1, infh);
    SWAP_INT32(&intval);
    if (cmd_ln_int32("-samprate") == 0)
        cmd_ln_set_int32("-samprate", intval);
    else if (cmd_ln_int32("-samprate") != intval) {
        E_WARN("WAVE file sampling rate %d != -samprate %d\n",
               intval, cmd_ln_int32("-samprate"));
    }

    /* Average bytes per second (we don't care) */
    TRY_FREAD(&intval, 4, 1, infh);

    /* Block alignment (we don't care) */
    TRY_FREAD(&shortval, 2, 1, infh);

    /* Bits per sample (must be 16) */
    TRY_FREAD(&shortval, 2, 1, infh);
    SWAP_INT16(&shortval);
    if (shortval != 16) {
        E_ERROR("WAVE file is not 16-bit\n");
        goto error_out;
    }

    /* Any extra parameters. */
    if (header_len > 16)
        fseek(infh, header_len - 16, SEEK_CUR);

    /* Now skip to the 'data' chunk. */
    while (1) {
        TRY_FREAD(id, 1, 4, infh);
        if (0 == memcmp(id, "data", 4)) {
            /* Total number of bytes of data (we don't care). */
            TRY_FREAD(&intval, 4, 1, infh);
            break;
        }
        else {
            /* Some other stuff... */
            /* Number of bytes of ... whatever */
            TRY_FREAD(&intval, 4, 1, infh);
            SWAP_INT32(&intval);
            fseek(infh, intval, SEEK_CUR);
        }
    }

    /* We are ready to rumble. */
    return 0;
error_out:
    return -1;
}
Exemplo n.º 23
0
/**
 * Read Sphinx-II format mfc file (s2mfc = Sphinx-II format MFC data).
 * If out_mfc is 0, no actual reading will be done, and the number of 
 * frames (plus padding) that would be read is returned.
 * 
 * It's important that normalization is done before padding because
 * frames outside the data we are interested in shouldn't be taken
 * into normalization stats.
 *
 * @return # frames read (plus padding) if successful, -1 if
 * error (e.g., mfc array too small).  
 */
static int32
feat_s2mfc_read_norm_pad(feat_t *fcb, char *file, int32 win,
            		 int32 sf, int32 ef,
            		 mfcc_t ***out_mfc,
            		 int32 maxfr,
            		 int32 cepsize)
{
    FILE *fp;
    int32 n_float32;
    float32 *float_feat;
    struct stat statbuf;
    int32 i, n, byterev;
    int32 start_pad, end_pad;
    mfcc_t **mfc;

    /* Initialize the output pointer to 0, so that any attempts to
       free() it if we fail before allocating it will not segfault! */
    if (out_mfc)
        *out_mfc = 0;
    E_INFO("Reading mfc file: '%s'[%d..%d]\n", file, sf, ef);
    if (ef >= 0 && ef <= sf) {
        E_ERROR("%s: End frame (%d) <= Start frame (%d)\n", file, ef, sf);
        return -1;
    }

    /* Find filesize; HACK!! To get around intermittent NFS failures, use stat_retry */
    if ((stat_retry(file, &statbuf) < 0)
        || ((fp = fopen(file, "rb")) == 0)) {
#ifndef POCKETSPHINX_NET
         E_ERROR("Failed to open file '%s' for reading: %s\n", file, strerror(errno));
#endif
        return -1;
    }

    /* Read #floats in header */
    if (fread_retry(&n_float32, sizeof(int32), 1, fp) != 1) {
        E_ERROR("%s: fread(#floats) failed\n", file);
        fclose(fp);
        return -1;
    }

    /* Check if n_float32 matches file size */
    byterev = 0;
    if ((int32) (n_float32 * sizeof(float32) + 4) != (int32) statbuf.st_size) { /* RAH, typecast both sides to remove compile warning */
        n = n_float32;
        SWAP_INT32(&n);

        if ((int32) (n * sizeof(float32) + 4) != (int32) (statbuf.st_size)) {   /* RAH, typecast both sides to remove compile warning */
            E_ERROR
                ("%s: Header size field: %d(%08x); filesize: %d(%08x)\n",
                 file, n_float32, n_float32, statbuf.st_size,
                 statbuf.st_size);
            fclose(fp);
            return -1;
        }

        n_float32 = n;
        byterev = 1;
    }
    if (n_float32 <= 0) {
        E_ERROR("%s: Header size field (#floats) = %d\n", file, n_float32);
        fclose(fp);
        return -1;
    }

    /* Convert n to #frames of input */
    n = n_float32 / cepsize;
    if (n * cepsize != n_float32) {
        E_ERROR("Header size field: %d; not multiple of %d\n", n_float32,
                cepsize);
        fclose(fp);
        return -1;
    }

    /* Check start and end frames */
    if (sf > 0) {
        if (sf >= n) {
            E_ERROR("%s: Start frame (%d) beyond file size (%d)\n", file,
                    sf, n);
            fclose(fp);
            return -1;
        }
    }
    if (ef < 0)
        ef = n-1;
    else if (ef >= n) {
        E_WARN("%s: End frame (%d) beyond file size (%d), will truncate\n",
               file, ef, n);
        ef = n-1;
    }

    /* Add window to start and end frames */
    sf -= win;
    ef += win;
    if (sf < 0) {
        start_pad = -sf;
        sf = 0;
    }
    else
        start_pad = 0;
    if (ef >= n) {
        end_pad = ef - n + 1;
        ef = n - 1;
    }
    else
        end_pad = 0;

    /* Limit n if indicated by [sf..ef] */
    if ((ef - sf + 1) < n)
        n = (ef - sf + 1);
    if (maxfr > 0 && n + start_pad + end_pad > maxfr) {
        E_ERROR("%s: Maximum output size(%d frames) < actual #frames(%d)\n",
                file, maxfr, n + start_pad + end_pad);
        fclose(fp);
        return -1;
    }

    /* If no output buffer was supplied, then skip the actual data reading. */
    if (out_mfc != 0) {
        /* Position at desired start frame and read actual MFC data */
        mfc = (mfcc_t **)ckd_calloc_2d(n + start_pad + end_pad, cepsize, sizeof(mfcc_t));
        if (sf > 0)
            fseek(fp, sf * cepsize * sizeof(float32), SEEK_CUR);
        n_float32 = n * cepsize;
#ifdef FIXED_POINT
        float_feat = ckd_calloc(n_float32, sizeof(float32));
#else
        float_feat = mfc[start_pad];
#endif
        if (fread_retry(float_feat, sizeof(float32), n_float32, fp) != n_float32) {
            E_ERROR("%s: fread(%dx%d) (MFC data) failed\n", file, n, cepsize);
            ckd_free_2d(mfc);
            fclose(fp);
            return -1;
        }
        if (byterev) {
            for (i = 0; i < n_float32; i++) {
                SWAP_FLOAT32(&float_feat[i]);
            }
        }
#ifdef FIXED_POINT
        for (i = 0; i < n_float32; ++i) {
            mfc[start_pad][i] = FLOAT2MFCC(float_feat[i]);
        }
        ckd_free(float_feat);
#endif

        /* Normalize */
        feat_cmn(fcb, mfc + start_pad, n, 1, 1);
        feat_agc(fcb, mfc + start_pad, n, 1, 1);

        /* Replicate start and end frames if necessary. */
        for (i = 0; i < start_pad; ++i)
            memcpy(mfc[i], mfc[start_pad], cepsize * sizeof(mfcc_t));
        for (i = 0; i < end_pad; ++i)
            memcpy(mfc[start_pad + n + i], mfc[start_pad + n - 1],
                   cepsize * sizeof(mfcc_t));

        *out_mfc = mfc;
    }

    fclose(fp);
    return n + start_pad + end_pad;
}
Exemplo n.º 24
0
Arquivo: vq.c Projeto: 10v/cmusphinx
static int32 gauden_param_read(float32 *****out_param,
			       int32 *out_n_mgau,
			       int32 *out_n_feat,
			       int32 *out_n_density,
			       int32 **out_veclen,
			       const char *file_name)
{
    char version[1024], tmp;
    FILE *fp;
    int32 i, j, k, l, blk, n;
    int32 n_mgau;
    int32 n_feat;
    int32 n_density;
    int32 *veclen;
    int32 needs_reorder;
    float32 ****out;
    float32 *buf;

    E_INFO("Reading mixture gaussian parameter: %s\n", file_name);
    
    if ((fp = fopen(file_name, "rb")) == NULL)
	E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name);

    if (fscanf(fp, "%s", version) != 1)
	E_FATAL("Unable to read version id\n");
    
    if (strcmp(version, GAUDEN_PARAM_VERSION) != 0)
	E_FATAL("Version mismatch: %s, expecting %s\n", version, GAUDEN_PARAM_VERSION);
    
    if (bcomment_read(fp) != S3_SUCCESS)
	E_FATAL("bcomment_read() failed\n");
    
    if ((needs_reorder = swap_check(fp)) < 0)
	E_FATAL("swap_check() failed\n");

    /* #Codebooks */
    if (fread_retry(&n_mgau, sizeof(uint32), 1, fp) != 1)
	E_FATAL("Error reading #codebooks\n");
    if (needs_reorder) {
	SWAP_INT32(&n_mgau);
    }
    *out_n_mgau = n_mgau;

    /* #Features/codebook */
    if (fread_retry(&n_feat, sizeof(uint32), 1, fp) != 1)
	E_FATAL("Error reading #features/codebook\n");
    if (needs_reorder) {
	SWAP_INT32(&n_feat);
    }
    *out_n_feat = n_feat;

    /* #Gaussian densities/feature in each codebook */
    if (fread_retry(&n_density, sizeof(uint32), 1, fp) != 1)
	E_FATAL("Error reading #densities/codebook-feature\n");
    if (needs_reorder) {
	SWAP_INT32(&n_density);
    }
    *out_n_density = n_density;
    
    /* #Dimensions in each feature stream */
    veclen = ckd_calloc(n_feat, sizeof(uint32));
    *out_veclen = veclen;
    if (fread_retry(veclen, sizeof(uint32), n_feat, fp) != n_feat)
	E_FATAL("Error reading feature vector lengths\n");
    if (needs_reorder) {
	for (i = 0; i < n_feat; i++)
	    SWAP_INT32(&veclen[i]);
    }

    /* blk = total vector length of all feature streams */
    for (i = 0, blk = 0; i < n_feat; i++)
	blk += veclen[i];

    /* #Floats to follow; for the ENTIRE SET of CODEBOOKS */
    if (fread_retry(&n, sizeof(uint32), 1, fp) != 1)
	E_FATAL("Error reading #floats\n");
    if (needs_reorder) {
	SWAP_INT32(&n);
    }
    assert(n == n_mgau * n_density * blk);
    
    /* Allocate memory for mixture gaussian densities */
    out = (float32 ****) ckd_calloc_3d (n_mgau, n_feat, n_density,
					sizeof(float32 *));
    buf = (float32 *) ckd_calloc (n, sizeof(float));
    for (i = 0, l = 0; i < n_mgau; i++) {
	for (j = 0; j < n_feat; j++) {
	    for (k = 0; k < n_density; k++) {
		out[i][j][k] = &buf[l];

		l += veclen[j];
	    }
	}
    }

    /* Read mixture gaussian densities data */
    if (fread_retry (buf, sizeof(float32), n, fp) != n)
	E_FATAL("Error reading gaussian data\n");
    if (needs_reorder)
	for (i = 0; i < n; i++)
	    SWAP_FLOAT32(&buf[i]);
    
    E_INFO("%d codebook, %d feature, size",
	   n_mgau, n_feat);
    for (i = 0; i < n_feat; i++)
	printf (" %dx%d", n_density, veclen[i]);
    printf ("\n");

    if (fread (&tmp, 1, 1, fp) == 1)
	E_WARN("Non-empty file beyond end of data\n");

    *out_param = out;
    
    fclose(fp);

    return 0;
}