Ejemplo n.º 1
0
Archivo: bitio.c Proyecto: cran/rcqp
/**
 * Reads bit data from a file into an unsigned int.
 *
 * This function reads nbits into an unsigned int, padded to the right.
 *
 * @param data    Pointer to the location for the read bit data.
 * @param nbits   Number of bits to read.
 * @param stream  The BFile buffer to use.
 * @return        Boolean: 1 for all OK, 0 for a problem.
 */
int
BFreadWord(unsigned int *data, int nbits, BFile *stream)
{
  int bytes, rest, i;
  unsigned char *cdata;

  if ((nbits > 32) || (nbits < 0)) {
   Rprintf( "bitio.o/BFreadWord: nbits (%d) not in legal bounds\n", nbits);
    return 0;
  }

  cdata = (unsigned char *)data;

  bytes = nbits / 8;
  rest  = nbits % 8;

  if (rest)
    if (!BFread(cdata + 3 - bytes, rest, stream))
      return 0;

  for (i = 4 - bytes; i < 4; i++)
    if (!BFread(cdata + i, 8, stream))
      return 0;

  /* As in BFwriteWord, the above code assumes that integers are 4 bytes long
     and stored in LSB first fashion. To avoid rewriting the whole code, we just
     convert from this Network byte-order to the platform's native byte-order
     in the end (which assumes that ints are 4 bytes ... but hey, we've got to 
     live with that in the CWB! */
  *data = ntohl(*data);

  return 1;
}
Ejemplo n.º 2
0
int read_golomb_code_am(int b, BFile *bf)
{
  int q, i, nr_sc, lb, ub;

  unsigned int r;
  unsigned char bit;

  double ldb;

  ldb = log2(b * 1.0);
  ub = nint(ceil(ldb));
  lb = ub - 1;

  /* read unary part */

  q = 0;
  do {
    BFread(&bit, 1, bf);
    if (bit)
      q++;
  } while (bit);

  nr_sc = (1 << ub) - b;
  
  /* read binary part, bitwise */

  r = 0;
  for (i = 0; i < lb; i++) {
    r <<= 1;
    BFread(&bit, 1, bf);
    r |= bit;
  }

  if (debug_cwb_compress_rdx)
    fprintf(debug_output, "%8d:  Read r=%5d [%3d/%3d]  #sc=%4d, ",
            codepos, r, lb, ub, nr_sc);

  if (r >= nr_sc) {
    r <<= 1;
    BFread(&bit, 1, bf);
    r |= bit;
    r -= nr_sc;
  }

  if (debug_cwb_compress_rdx)
    fprintf(debug_output, "final r=%d\tgap=%d\n", 
            r, r+q*b);

  return r + q * b;
}
Ejemplo n.º 3
0
/**
 * Checks a huffcoded attribute for errors by decompressing it.
 *
 * This function assumes that compute_code_lengths() has been called
 * beforehand and made sure that the _uncompressed_ token sequence is
 * used by CL access functions.
 *
 * @param attr  The attribute to check.
 * @param fname Base filename to use for the three compressed-attribute files.
 *              Can be NULL, in which case the filenames in the attribute are used.
 */
void 
decode_check_huff(Attribute *attr, char *fname)
{
  BFile bfd;
  FILE *sync;
  HCD hc;

  int pos, size, sync_offset, offset;

  int l, v;
  int item, true_item;
  
  unsigned char bit;

  char hcd_path[CL_MAX_LINE_LENGTH];
  char huf_path[CL_MAX_LINE_LENGTH];
  char sync_path[CL_MAX_LINE_LENGTH];

  
  Rprintf("VALIDATING %s.%s\n", corpus_id_cwb_huffcode, attr->any.name);

  if (fname) {
    sprintf(hcd_path, "%s.hcd", fname);
    sprintf(huf_path, "%s.huf", fname);
    sprintf(sync_path, "%s.huf.syn", fname);
  }
  else {

    char *path;

    path = component_full_name(attr, CompHuffSeq, NULL);
    assert(path && (cderrno == CDA_OK));
    strcpy(huf_path, path);
    
    path = component_full_name(attr, CompHuffCodes, NULL);
    assert(path && (cderrno == CDA_OK));
    strcpy(hcd_path, path);

    path = component_full_name(attr, CompHuffSync, NULL);
    assert(path && (cderrno == CDA_OK));
    strcpy(sync_path, path);
    
  }

  Rprintf("- reading code descriptor block from %s\n", hcd_path);
  if (!ReadHCD(hcd_path, &hc)) {
    Rprintf( "ERROR: reading %s failed. Aborted.\n",  hcd_path);
    rcqp_receive_error(1);
  }

  Rprintf("- reading compressed item sequence from %s\n", huf_path);
  if (!BFopen(huf_path, "r", &bfd)) {
    Rprintf( "ERROR: can't open file %s. Aborted.\n", huf_path);
    perror(huf_path);
    rcqp_receive_error(1);
  }

  Rprintf("- reading sync (mod %d) from %s\n", SYNCHRONIZATION, sync_path);
  if ((sync = fopen(sync_path, "r")) == NULL) {
    Rprintf( "ERROR: can't open file %s. Aborted.\n", sync_path);
    perror(sync_path);
    rcqp_receive_error(1);
  }

  size = cl_max_cpos(attr);
  if (size != hc.length) {
    Rprintf( "ERROR: wrong corpus size (%d tokens) in %s (correct size: %d)\n",
            hc.length, hcd_path, size);
    rcqp_receive_error(1);
  }

  for (pos = 0; pos < hc.length; pos++) {

    if ((pos % SYNCHRONIZATION) == 0) {
      offset = BFposition(&bfd); /* need to get offset before flushing (because flushing fills the bit buffer and advances offset to the following byte!) */
      if (pos > 0)
        BFflush(&bfd);
      sync_offset = -1;                /* make sure we get an error if read below fails */
      NreadInt(&sync_offset, sync);
      if (offset != sync_offset) {
        Rprintf( "ERROR: wrong sync offset %d (true offset %d) at cpos %d. Aborted.\n",
                sync_offset, offset, pos);
        rcqp_receive_error(1);
      }
    }

    if (!BFread(&bit, 1, &bfd)) {
      Rprintf( "ERROR reading file %s. Aborted.\n", huf_path);
      rcqp_receive_error(1);
    }

    v = (bit ? 1 : 0);
    l = 1;
    while (v < hc.min_code[l]) {
      if (!BFread(&bit, 1, &bfd)) {
        Rprintf( "ERROR reading file %s. Aborted.\n", huf_path);
        return;
      }
      v <<= 1;
      if (bit)
        v++;
      l++;
    }
    item = hc.symbols[hc.symindex[l] + v - hc.min_code[l]];

    true_item = cl_cpos2id(attr, pos);
    if (item != true_item) {
      Rprintf( "ERROR: wrong token (id=%d) at cpos %d (correct id=%d). Aborted.\n",
              item, pos, true_item);
    }

  }
  fclose(sync);
  BFclose(&bfd);

  /* tell the user it's safe to delete the CORPUS component now */
  Rprintf("!! You can delete the file <%s> now.\n",
         component_full_name(attr, CompCorpus, NULL));
  
  return;                        /* exits on error, so there's no return value */
}