Beispiel #1
0
int check_slxn_trace(ztr_t *ztr, ztr_chunk_t *chunk, Trace_reader *trc) {
  char    *offs;
  int      offset;
  size_t   i;
  int      base;
  uint16_t val;
  uint8_t *data;

  offs = ztr_lookup_mdata_value(ztr, chunk, "OFFS");
  if (NULL == offs) {
    printf("Couldn't get OFFS metadata from SLXN trace chunk\n");
    return -1;
  }

  offset = atoi(offs);

  for (base = 0; base < 4; base++) {
    data = (uint8_t *) chunk->data + base * trc->seq_len * 2 + 2;
    for (i = 0; i < trc->seq_len; i++) {
      val = (((uint16_t) data[i * 2] << 8) | data[i * 2 + 1]);
      if (val - offset != (((base & 1) == 0) ? 1 : -1)) {
	printf("Incorrect SLXN data\n");
	return -1;
      }
    }
  }
  return 0;
}
Beispiel #2
0
int main(int argc, char **argv) {
    ztr_t *ztr;
    mFILE *fp;
    int i;

    if (argc >= 2) {
	if (NULL == (fp = mfopen(argv[1], "rb"))) {
	    perror(argv[1]);
	    return 1;
	}
    } else {
	fp = mstdin();
    }

    if (NULL == (ztr = mfread_ztr(fp))) {
	perror("fread_ztr");
	return 1;
    }

    printf("Nchunks = %d\n", ztr->nchunks);
    for (i = 0; i < ztr->nchunks; i++) {
	char str[5];
	int complen;
	int rawlen;
	char *val;

	(void)ZTR_BE2STR(ztr->chunk[i].type, str);
	complen = ztr->chunk[i].dlength;
	val = ztr_lookup_mdata_value(ztr, &ztr->chunk[i], "TYPE");
	if (val)
	    printf("-- %s (%s) --\n", str, val);
	else
	    printf("-- %s --\n", str);
	explode_chunk(ztr, &ztr->chunk[i]);
	rawlen = ztr->chunk[i].dlength;
	printf("SUMMARY %s  mlen %3d, dlen %6d, rawlen %6d, ratio %f\n",
	       str, ztr->chunk[i].mdlength,
	       complen, rawlen, (double)complen/rawlen);
#if 0
	fflush(stdout);
	puts("\n========================================");
	write(1, ztr->chunk[i].data, ztr->chunk[i].dlength);
	puts("\n========================================");
#endif
    }

    delete_ztr(ztr);
    
    return 0;
}
Beispiel #3
0
int check_trace(ztr_t *ztr, ztr_chunk_t *chunk, Trace_reader *trc) {
  char *type;
  
  if (chunk->dlength < trc->seq_len * 8 + 2) {
    fprintf(stderr,
	    "Trace is not long enough to account for all base calls\n");
    return -1;
  }

  type = ztr_lookup_mdata_value(ztr, chunk, "TYPE");
  if (NULL == type) {
    type = "PROC";
  }

  switch (type[0]) {
  case 'P':
    if (0 == strcmp(type + 1, "ROC")) {
      return check_proc_trace(chunk, trc);
    }
    break;
  case 'S':
    if ('L' == type[1] && 'X' == type[2]) {
      switch (type[3]) {
      case 'I':
	return check_slxi_trace(chunk, trc);
      case 'N':
	return check_slxn_trace(ztr, chunk, trc);
      default:
	break;
      }
    }
  default:
    break;
  }

  printf("Unexpected trace type '%s' found\n", type);
  return -1;
}
Beispiel #4
0
/*
 * Compresses a ztr (in memory).
 * Level is 0, 1, 2 or 3 (no compression, delta, delta + zlib,
 * chebyshev + zlib).
 */
int compress_ztr(ztr_t *ztr, int level) {
    int i;

    if (0 == level)
	return 0;

    for (i = 0; i < ztr->nchunks; i++) {
	/*
	{
	    char str[5];
	    fprintf(stderr, "---- %.4s ----\n",
		    ZTR_BE2STR(ztr->chunk[i].type,str));
	}
	fprintf(stderr, "Uncomp length=%d\n", ztr->chunk[i].dlength);
	*/

	switch(ztr->chunk[i].type) {
	    char *type;
	case ZTR_TYPE_SAMP:
	case ZTR_TYPE_SMP4:
#ifdef ILLUMINA_GA
	    compress_chunk(ztr, &ztr->chunk[i],
			   ZTR_FORM_STHUFF, CODE_TRACES, 0);
#else
	    type = ztr_lookup_mdata_value(ztr, &ztr->chunk[i], "TYPE");
	    if (type && 0 == strcmp(type, "PYRW")) {
		/* Raw data is not really compressable */
	    } else if (type && 0 == strcmp(type, "PYNO")) {
		if (level > 1) {
		    compress_chunk(ztr, &ztr->chunk[i], ZTR_FORM_16TO8,  0, 0);
		    compress_chunk(ztr, &ztr->chunk[i],
				   ZTR_FORM_ZLIB, Z_HUFFMAN_ONLY, 0);
		}
	    } else {
		if (level <= 2) {
		    /*
		     * Experiments show that typically a double delta does
		     * better than a single delta for 8-bit data, and the other
		     * way around for 16-bit data
		     */
		    compress_chunk(ztr, &ztr->chunk[i], ZTR_FORM_DELTA2,
				   ztr->delta_level, 0);
		} else {
		    compress_chunk(ztr, &ztr->chunk[i], ZTR_FORM_ICHEB,  0, 0);
		}
		
		compress_chunk(ztr, &ztr->chunk[i], ZTR_FORM_16TO8,  0, 0);
		if (level > 1) {
		    compress_chunk(ztr, &ztr->chunk[i], ZTR_FORM_FOLLOW1,0, 0);
		    /*
		      compress_chunk(ztr, &ztr->chunk[i],
		                     ZTR_FORM_ZLIB, Z_HUFFMAN_ONLY);
		    */
		    compress_chunk(ztr, &ztr->chunk[i], ZTR_FORM_RLE,  150, 0);
		    compress_chunk(ztr, &ztr->chunk[i],
		    		   ZTR_FORM_ZLIB, Z_HUFFMAN_ONLY, 0);
		}
	    }
#endif
	    break;

	case ZTR_TYPE_BASE:
#ifdef ILLUMINA_GA
	    compress_chunk(ztr, &ztr->chunk[i], ZTR_FORM_STHUFF, CODE_DNA, 0);
#else
	    if (level > 1) {
		compress_chunk(ztr, &ztr->chunk[i],
			       ZTR_FORM_ZLIB, Z_HUFFMAN_ONLY, 0);
	    }
#endif
	    break;

	case ZTR_TYPE_CNF1:
	case ZTR_TYPE_CNF4:
	case ZTR_TYPE_CSID:
#ifdef ILLUMINA_GA
	    compress_chunk(ztr, &ztr->chunk[i], ZTR_FORM_RLE,  77, 0);
	    compress_chunk(ztr, &ztr->chunk[i],
			   ZTR_FORM_STHUFF, CODE_CONF_RLE, 0);
#else
	    compress_chunk(ztr, &ztr->chunk[i], ZTR_FORM_DELTA1, 1, 0);
	    compress_chunk(ztr, &ztr->chunk[i], ZTR_FORM_RLE,  77, 0);
	    if (level > 1) {
		compress_chunk(ztr, &ztr->chunk[i],
			       ZTR_FORM_ZLIB, Z_HUFFMAN_ONLY, 0);
	    }
#endif
	    break;

	case ZTR_TYPE_BPOS:
	    compress_chunk(ztr, &ztr->chunk[i], ZTR_FORM_DELTA4, 1, 0);
	    compress_chunk(ztr, &ztr->chunk[i], ZTR_FORM_32TO8,  0, 0);
	    if (level > 1) {
		compress_chunk(ztr, &ztr->chunk[i],
			       ZTR_FORM_ZLIB, Z_HUFFMAN_ONLY, 0);
	    }
	    break;

	case ZTR_TYPE_TEXT:
#ifdef ILLUMINA_GA
#else
	    if (level > 1) {
		compress_chunk(ztr, &ztr->chunk[i],
			       ZTR_FORM_ZLIB, Z_HUFFMAN_ONLY, 0);
	    }
#endif
	    break;

	case ZTR_TYPE_FLWO:
	    compress_chunk(ztr, &ztr->chunk[i], ZTR_FORM_XRLE, 0, 4);
	    break;

	}

	/*
	fprintf(stderr, "Comp length=%d\n", ztr->chunk[i].dlength);
	*/
    }

    return 0;
}
Beispiel #5
0
int get_second_calls(ztr_t *ztr, size_t nbases, int **indexes) {
  ztr_chunk_t *smp4_chunk = NULL;
  char        *type = NULL;
  size_t       i;
  int max_base;
  int second_base;
  int base;
  uint16_t     val;
  uint16_t     max;
  uint16_t     second;
  uint8_t     *data[4];

  for (i = 0; i < ztr->nchunks; i++) {
    if (ZTR_TYPE_SMP4 == ztr->chunk[i].type) {
      type = ztr_lookup_mdata_value(ztr, &ztr->chunk[i], "TYPE");
      if (NULL == type
	  || 0 == strcmp(type, "PROC")
	  || 0 == strcmp(type, "SLXI")) {
	smp4_chunk = &ztr->chunk[i];
	break;
      }
    }
  }

  if (NULL == smp4_chunk) return 1;

  if (0 != uncompress_chunk(ztr, smp4_chunk)) {
    printf("Couldn't uncompresss SMP4 chunk\n");
    return -1;
  }
  
  if (smp4_chunk->dlength != nbases * 8 + 2) {
    printf("Trace and basecalls have different number of samples\n");
    return -1;
  }

  *indexes = smalloc(nbases * sizeof(int));
  if (NULL == type || type[0] == 'P') {
    data[0] = (uint8_t *) smp4_chunk->data + 2;
    data[1] = (uint8_t *) smp4_chunk->data + 2 + nbases * 2;
    data[2] = (uint8_t *) smp4_chunk->data + 2 + nbases * 4;
    data[3] = (uint8_t *) smp4_chunk->data + 2 + nbases * 6;
  } else {
    data[1] = (uint8_t *) smp4_chunk->data + 2;
    data[0] = (uint8_t *) smp4_chunk->data + 2 + nbases * 2;
    data[3] = (uint8_t *) smp4_chunk->data + 2 + nbases * 4;
    data[2] = (uint8_t *) smp4_chunk->data + 2 + nbases * 6;
  }

  for (i = 0; i < nbases; i++) {
    max    = ((uint16_t) data[0][i * 2] << 8) | data[0][i * 2 + 1];
    second = ((uint16_t) data[1][i * 2] << 8) | data[1][i * 2 + 1];
    if (second > max) {
      val = max; max = second; second = val;
      max_base = 1;
      second_base = 0;
    } else {
      max_base = 0;
      second_base = 1;
    }
    for (base = 2; base < 4; base++) {
      val = ((uint16_t) data[base][i * 2] << 8) | data[base][i * 2 + 1];
      if (val > max) {
	second = max;
	second_base = max_base;
	max = val;
	max_base = base;
      } else if (val > second) {
	second = val;
	second_base = base;
      }
    }
    (*indexes)[i] = second_base < max_base ? second_base : second_base - 1;
  }

  return 0;
}
Beispiel #6
0
/*
 * Parse the REGN chunk, add to regn HASH
 *
 * Returns corresponding HashItem * from regn Hash
 */
HashItem *parse_regn(ztr_t *z, ztr_chunk_t *chunk, HashTable *regn_hash) {
    char key[1024];
    char *name;
    HashItem *hi;
    regn_t *regn;
    size_t l;
    
    uncompress_chunk(z, chunk);

    /* the hash key is a combination of the region names and boundaries */
    name = ztr_lookup_mdata_value(z, chunk, "NAME");
    l = snprintf(key, sizeof(key), "names=%s", name);
    if( chunk->dlength ){
        int nbndy = (chunk->dlength-1)/4;
        uint4 *bndy = (uint4 *)(chunk->data+1);
        int ibndy;
	for (ibndy=0; ibndy<nbndy; ibndy++) {
            if( ibndy )
                l += snprintf(key + l, sizeof(key) - l,
			      ";%d", be_int4(bndy[ibndy]));
            else
                l += snprintf(key + l, sizeof(key) - l,
			      " boundaries=%d", be_int4(bndy[ibndy]));
        }
    }

    if (NULL == (hi = (HashTableSearch(regn_hash, key, strlen(key))))) {
        int iregion, nregions = 0;
        char *coord;
	char *cp1;
        uint4 bndy[MAX_REGIONS];
        int ibndy, nbndy = 0;
        HashData hd;

        if( NULL == (regn = (regn_t *)malloc(sizeof(regn_t)))) {
	    return NULL;
	}

	coord = ztr_lookup_mdata_value(z, chunk, "COORD");
	regn->coord = (NULL == coord ? 'B' : *coord );

	regn->region_names = strdup(name);

        cp1 = strtok (regn->region_names,";");
        while(cp1) {
            char *cp2;
            if(NULL == (cp2 = strchr(cp1,':'))) {
                fprintf(stderr, "Invalid region name/code pair %s\n", cp1);
                return NULL;
            }
            *cp2++ = '\0';
            regn->name[nregions] = cp1;
            regn->code[nregions] = *cp2;
            nregions++;
            cp1 = strtok (NULL, ";");
        }

        regn->nregions = nregions;

	if( chunk->dlength ) {
            nbndy = (chunk->dlength-1)/4;
            memcpy(bndy, chunk->data+1, chunk->dlength-1);
	}

        for( iregion=0, ibndy=0; iregion<nregions; iregion++) {
            /* start = (start + length of previous region) or 0 if no previous region */
            /* length = (next boundary - start of region) or -1 if no next boundary */
            if( regn->code[iregion] == 'E' ){
                /* no sequence, length = 0 */
                regn->start[iregion] = (iregion ? (regn->start[iregion-1] + regn->length[iregion-1]) : 0);
                regn->length[iregion] = 0;
            }else{
                if( ibndy > nbndy ){
                    fprintf(stderr, "More name/code pairs than boundaries\n");
                    return NULL;
                }
                regn->start[iregion] = (iregion ? (regn->start[iregion-1] + regn->length[iregion-1]) : 0);
                regn->length[iregion] = (ibndy == nbndy ? -1 : (be_int4(bndy[ibndy])-regn->start[iregion]));
                ibndy++;
            }
        }

        regn->count = 1;
            
	hd.p = regn;
	if (NULL == (hi = HashTableAdd(regn_hash, key, strlen(key), hd, NULL))) {
	    free(regn->region_names);
	    free(regn);
	    return NULL;
	}
    } else {
	regn = (regn_t *)(hi->data.p);
	regn->count++;
    }

    return hi;
}
/*
 * Prints a read in solexa format.  Depending on the given chunk type mode,
 * only some of the chunks are printed out for every read.  The files are
 * given in the following order: seq, prb, sig2, int, nse.  The files must
 * already be open.
 */
void dump_solexa(ztr_t *z, char *name, char mode, FILE **files) {
    int i, nc;
    ztr_chunk_t **chunks;
    char *seq;
    int lane, tile, x, y;
    parse_name(name, &lane, &tile, &x, &y);

    uncompress_ztr(z);

    chunks = ztr_find_chunks(z, ZTR_TYPE_BASE, &nc);
    if (nc != 1) {
	fprintf(stderr, "Zero or greater than one BASE chunks found.\n");
	return;
    }
    seq = chunks[0]->data+1;

    /* Sequence */
    if (mode & SEQ) {
	fprintf(files[0], "%d\t%d\t%d\t%d\t%.*s\n",
		lane, tile, x, y,
		chunks[0]->dlength-1,
		chunks[0]->data+1);
    }

    /* Confidence */
    if (mode & PRB) {
	chunks = ztr_find_chunks(z, ZTR_TYPE_CNF4, &nc);
	if (nc != 1) {
	    fprintf(stderr, "Zero or greater than one CNF chunks found.\n");
	    return;
	}

	dump_conf4_solexa(files[1], seq, (sc *)chunks[0]->data+1,
			  chunks[0]->dlength-1);
    }

    /* Traces */
    if (mode & SIG2) {
	chunks = ztr_find_chunks(z, ZTR_TYPE_SMP4, &nc);
	for (i = 0; i < nc; i++) {
	    char *key = ztr_lookup_mdata_value(z, chunks[i], "TYPE");
	    if (!key || 0 == strcmp(key, "PROC")) {
		key = ztr_lookup_mdata_value(z, chunks[i], "OFFS");
		dump_samples4_solexa(files[2], lane, tile, x, y,
				     key ? atoi(key) : 0,
				     (uc *)chunks[i]->data+2,
				     chunks[i]->dlength-2);
		break;
	    }
	}
    }

    if (mode & INT) {
	chunks = ztr_find_chunks(z, ZTR_TYPE_SMP4, &nc);
	for (i = 0; i < nc; i++) {
	    char *key = ztr_lookup_mdata_value(z, chunks[i], "TYPE");
	    if (key && 0 == strcmp(key, "SLXI")) {
		key = ztr_lookup_mdata_value(z, chunks[i], "OFFS");
		dump_samples4_solexa(files[3], lane, tile, x, y,
				     key ? atoi(key) : 0,
				     (uc *)chunks[i]->data+2,
				     chunks[i]->dlength-2);
		break;
	    }
	}
    }

    if (mode & NSE) {
	chunks = ztr_find_chunks(z, ZTR_TYPE_SMP4, &nc);
	for (i = 0; i < nc; i++) {
	    char *key = ztr_lookup_mdata_value(z, chunks[i], "TYPE");
	    if (key && 0 == strcmp(key, "SLXN")) {
		key = ztr_lookup_mdata_value(z, chunks[i], "OFFS");
		dump_samples4_solexa(files[4], lane, tile, x, y,
				     key ? atoi(key) : 0,
				     (uc *)chunks[i]->data+2,
				     chunks[i]->dlength-2);
		break;
	    }
	}
    }
}