/* * Counts the contents of an SRF file. * If the hash index exists it uses this instead. * * Returns num_reads for success * -1 for failure */ int64_t count_file(char *fname, opts *opts) { srf_t *srf; srf_index_hdr_t hdr; off_t ipos, skip; int item_sz = 9; if (NULL == (srf = srf_open(fname, "r"))) { perror(fname); return -1; } /* Read the index header */ if (0 != srf_read_index_hdr(srf, &hdr, 0)) { srf_destroy(srf, 1); return list_file(fname, opts); } ipos = ftello(srf->fp); /* Compute the remaining size of the index and divide by item_sz */ if (hdr.dbh_pos_stored_sep) item_sz += 4; skip = hdr.index_hdr_sz + hdr.n_container * 8 + hdr.n_data_block_hdr * 8 + hdr.n_buckets * 8; srf_destroy(srf, 1); return (hdr.size - skip - 16/* footer*/) / item_sz; }
/* * Lists the contents of an SRF file. * * Returns num_reads for success * -1 for failure */ int64_t list_file(char *fname, opts *opts) { srf_t *srf; char name[512]; int64_t count = 0; int type; uint64_t pos; if (NULL == (srf = srf_open(fname, "r"))) { perror(fname); return -1; } /* Scan through file gathering the details to index in memory */ while ((type = srf_next_block_details(srf, &pos, name)) >= 0) { if (type == SRFB_TRACE_BODY) { count++; if (!opts->count_only) { if (opts->long_format) printf("%-30s %10"PRId64" + %4d + %5d\n", name, pos, srf->tb.trace_size, srf->th.trace_hdr_size); else puts(name); } } } srf_destroy(srf, 1); return count; }
int check_srf(char *srf_name, Settings *opts) { FILE *fp = fopen(srf_name, "rb"); srf_t *srf; int res; if (NULL == fp) { printf("Couldn't open %s: %s\n", srf_name, strerror(errno)); return -1; } srf = srf_create(fp); if (NULL == srf) { printf("srf_create failed for %s\n", srf_name); fclose(fp); return -1; } res = read_srf(srf, srf_name, opts); srf_destroy(srf, 0); if (0 != fclose(fp)) { printf("Error closing %s: %s\n", srf_name, strerror(errno)); return -1; } return res; }
/* * Given the archive name (ar_name), the chunk types to output (chunk_mode), * and some other parameters such as the read filter, open one file and dumps * text formatted output to that file. The number of reads is updated in the * \"num_reads\" parameter. * * Returns 0 on success. */ int process_srf_to_text_files(char *ar_name, char chunk_mode, int num_reads_only_mode, int filter_mode, read_filter_t *read_filter, long *num_reads) { srf_t *srf; char name[1024], dir2[1024]; ztr_t *ztr; int last_lane = 0, last_tile = 0; /* dump */ FILE *files[] = {NULL}; if (NULL == (srf = srf_open(ar_name, "rb"))) { perror(ar_name); return 1; } char *cp = strrchr(ar_name, '.'); if (cp) *cp = 0; sprintf(dir2, "%s.run", ar_name); mkdir(dir2, 0777); while (NULL != (ztr = srf_next_ztr(srf, name, 0))) { int lane, tile, x, y; parse_name(name, &lane, &tile, &x, &y); if (last_lane != lane || last_tile != tile) { fprintf(stderr, "New tile: %d/%d\n", lane, tile); last_lane = lane; last_tile = tile; if (files[0]) { fclose(files[0]); files[0] = NULL; } if (chunk_mode) { files[0] = fopen_text(dir2, lane, tile); } } if(!num_reads_only_mode) { if(!filter_mode || (filter_mode && check_read_name(read_filter, name))) { dump_text(ztr, name, chunk_mode, files); ++(*num_reads); } } else { ++(*num_reads); } delete_ztr(ztr); } if (files[0]) fclose(files[0]); srf_destroy(srf, 1); return 0; }
/* * Given the archive name and the level_mode * generate information about the archive * * Note the generated srf file is NOT indexed * * Returns 0 on success. */ int srf_info(char *input, int level_mode, long *read_count, long *chunk_count, uint64_t *chunk_size, long key_count[NCHUNKS][NKEYS], long type_count[NCHUNKS][NTYPES], HashTable *regn_hash, uint64_t *base_count) { srf_t *srf; off_t pos; int type; int count = 0; long trace_body_count = 0; char name[1024]; if (NULL == (srf = srf_open(input, "rb"))) { perror(input); return 1; } while ((type = srf_next_block_type(srf)) >= 0) { switch (type) { case SRFB_CONTAINER: if( trace_body_count ){ if( level_mode & LEVEL_NAME ) printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); trace_body_count = 0; } if (0 != srf_read_cont_hdr(srf, &srf->ch)) { fprintf(stderr, "Error reading container header.\nExiting.\n"); exit(1); } break; case SRFB_XML: if( trace_body_count ){ if( level_mode & LEVEL_NAME ) printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); trace_body_count = 0; } if (0 != srf_read_xml(srf, &srf->xml)) { fprintf(stderr, "Error reading XML.\nExiting.\n"); exit(1); } break; case SRFB_TRACE_HEADER: if( trace_body_count ){ if( level_mode & LEVEL_NAME ) printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); trace_body_count = 0; } if (0 != srf_read_trace_hdr(srf, &srf->th)) { fprintf(stderr, "Error reading trace header.\nExiting.\n"); exit(1); } if( 0 == (level_mode & (LEVEL_CHUNK | LEVEL_BASE)) ) break; /* Decode ZTR chunks in the header */ if (srf->mf) mfdestroy(srf->mf); srf->mf = mfcreate(NULL, 0); if (srf->th.trace_hdr_size) mfwrite(srf->th.trace_hdr, 1, srf->th.trace_hdr_size, srf->mf); if (srf->ztr) delete_ztr(srf->ztr); mrewind(srf->mf); if (NULL != (srf->ztr = partial_decode_ztr(srf, srf->mf, NULL))) { srf->mf_pos = mftell(srf->mf); } else { /* Maybe not enough to decode or no headerBlob. */ /* So delay until decoding the body. */ srf->mf_pos = 0; } mfseek(srf->mf, 0, SEEK_END); srf->mf_end = mftell(srf->mf); break; case SRFB_TRACE_BODY: { srf_trace_body_t old_tb; ztr_t *ztr_tmp; int no_trace = (level_mode & (LEVEL_CHUNK | LEVEL_BASE) ? 0 : 1); if (0 != srf_read_trace_body(srf, &old_tb, no_trace)) { fprintf(stderr, "Error reading trace body.\nExiting.\n"); exit(1); } if (-1 == construct_trace_name(srf->th.id_prefix, (unsigned char *)old_tb.read_id, old_tb.read_id_length, name, 512)) { fprintf(stderr, "Error constructing trace name.\nExiting.\n"); exit(1); } trace_body_count++; if( 1 == trace_body_count ){ if( level_mode & LEVEL_NAME ) printf( "trace_name: %s + %s", srf->th.id_prefix, name+strlen(srf->th.id_prefix)); } read_count[READ_TOTAL]++; if (old_tb.flags & SRF_READ_FLAG_BAD_MASK ){ read_count[READ_BAD]++; } else { read_count[READ_GOOD]++; } if( 0 == (level_mode & (LEVEL_CHUNK | LEVEL_BASE)) ) break; if (!srf->mf) { fprintf(stderr, "Error reading trace body.\nExiting.\n"); exit(1); } mfseek(srf->mf, srf->mf_end, SEEK_SET); if (old_tb.trace_size) { mfwrite(old_tb.trace, 1, old_tb.trace_size, srf->mf); free(old_tb.trace); old_tb.trace = NULL; } mftruncate(srf->mf, mftell(srf->mf)); mfseek(srf->mf, srf->mf_pos, SEEK_SET); if (srf->ztr) ztr_tmp = ztr_dup(srf->ztr); /* inefficient, but simple */ else ztr_tmp = NULL; if ((ztr_tmp = partial_decode_ztr(srf, srf->mf, ztr_tmp))) { int i; for (i=0; i<ztr_tmp->nchunks; i++) { int ichunk = -1; switch (ztr_tmp->chunk[i].type) { case ZTR_TYPE_BASE: ichunk = CHUNK_BASE; chunk_size[ichunk] += ztr_tmp->chunk[i].dlength; if( parse_base(ztr_tmp, &ztr_tmp->chunk[i], base_count) ){ delete_ztr(ztr_tmp); return 1; } break; case ZTR_TYPE_CNF1: ichunk = CHUNK_CNF1; chunk_size[ichunk] += ztr_tmp->chunk[i].dlength; break; case ZTR_TYPE_CNF4: ichunk = CHUNK_CNF4; chunk_size[ichunk] += ztr_tmp->chunk[i].dlength; break; case ZTR_TYPE_SAMP: ichunk = CHUNK_SAMP; chunk_size[ichunk] += ztr_tmp->chunk[i].dlength; break; case ZTR_TYPE_SMP4: ichunk = CHUNK_SMP4; chunk_size[ichunk] += ztr_tmp->chunk[i].dlength; break; case ZTR_TYPE_REGN: ichunk = CHUNK_REGN; chunk_size[ichunk] += ztr_tmp->chunk[i].dlength; if( NULL == parse_regn(ztr_tmp, &ztr_tmp->chunk[i], regn_hash) ){ delete_ztr(ztr_tmp); return 1; } break; default: break; } if( ichunk > -1 ) { chunk_count[ichunk]++; count_mdata_keys(ztr_tmp, &ztr_tmp->chunk[i], ichunk, key_count, type_count); } } } if( ztr_tmp ) delete_ztr(ztr_tmp); count++; if( (level_mode == LEVEL_CHECK) && (count == 10) ){ printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); srf_destroy(srf, 1); return 0; } break; } case SRFB_INDEX: { off_t pos = ftell(srf->fp); if( trace_body_count ){ if( level_mode & LEVEL_NAME ) printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); trace_body_count = 0; } printf( "Reading srf index block\n"); if (0 != srf_read_index_hdr(srf, &srf->hdr, 1)) { srf_destroy(srf, 1); fprintf(stderr, "Error reading srf index block header.\nExiting.\n"); exit(1); } /* Skip the index body */ fseeko(srf->fp, pos + srf->hdr.size, SEEK_SET); break; } case SRFB_NULL_INDEX: { uint64_t ilen; if( trace_body_count ){ if( level_mode & LEVEL_NAME ) printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); trace_body_count = 0; } printf( "Reading srf null index block\n"); /* * Maybe the last 8 bytes of a the file (or previously was * last 8 bytes prior to concatenating SRF files together). * If so it's the index length and should always be 8 zeros. */ if (1 != fread(&ilen, 8, 1, srf->fp)) { srf_destroy(srf, 1); fprintf(stderr, "Error reading srf null index block.\nExiting.\n"); exit(1); } if (ilen != 0) { srf_destroy(srf, 1); fprintf(stderr, "Invalid srf null index block.\nExiting.\n"); exit(1); } break; } default: srf_destroy(srf, 1); fprintf(stderr, "Block of unknown type '%c'\nExiting.\n", type); exit(1); } } if( trace_body_count ){ if( level_mode & LEVEL_NAME ) printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); trace_body_count = 0; } /* the type should be -1 (EOF) */ if( type != -1 ) { fprintf(stderr, "Block of unknown type '%c'\nExiting.\n", type); exit(1); } /* are we really at the end of the srf file */ pos = ftell(srf->fp); fseek(srf->fp, 0, SEEK_END); if( pos != ftell(srf->fp) ){ fprintf(stderr, "srf file is corrupt\n"); exit(1); } srf_destroy(srf, 1); return 0; }
/* * Main method. */ int main(int argc, char **argv) { int i; long num_reads = 0; int num_reads_mode = 0; int num_reads_only_mode = 0; int filter_mode = 0; char *ar_name = NULL; srf_t *srf = NULL; char name[512]; ztr_t *ztr = NULL; read_filter_t *read_filter = NULL; char *filter_value = NULL; int c; int errflg = 0; extern char *optarg; extern int optind, optopt; char chunk_mode = ALL; char type_mode = TEXT; char destination_mode = CONSOLE_DEST; FILE **files = NULL; int verbose = 0; if (argc < 2) { fprintf(stderr, "Please specify an archive name.\n"); usage(1); } while ((c = getopt(argc, argv, ":c:d:f:not:v")) != -1) { switch (c) { case 'c': chunk_mode = 0; if(get_chunk_types(optarg, &chunk_mode) || !chunk_mode) { fprintf(stderr, "Invalid value \"%s\" given to option -%c.\n", optarg, c); errflg++; } break; case 'd': destination_mode = 0; if(get_destination_types(optarg, &destination_mode) || !destination_mode) { fprintf(stderr, "Invalid value \"%s\" given to option -%c.\n", optarg, c); errflg++; } break; case 'f': if (num_reads_only_mode) { fprintf(stderr, "Option -%c is exclusing with option -o.\n", c); errflg++; } else { filter_mode++; filter_value = optarg; } break; case 'n': if (num_reads_only_mode) { fprintf(stderr, "Option -%c is exclusing with option -o.\n", c); errflg++; } else num_reads_mode++; break; case 'o': if (num_reads_mode) { fprintf(stderr, "Option -%c is exclusing with option -n.\n", c); errflg++; } else if (filter_mode) { fprintf(stderr, "Option -%c is exclusing with option -f.\n", c); errflg++; } else num_reads_only_mode++; break; case 't': type_mode=0; if(get_type_of_output(optarg, &type_mode) || !type_mode) { fprintf(stderr, "Invalid value \"%s\" given to option -%c.\n", optarg, c); errflg++; } break; case 'v': verbose++; break; case ':': /* -? without operand */ fprintf(stderr, "Option -%c requires an operand\n", optopt); errflg++; break; case '?': fprintf(stderr, "Unrecognised option: -%c\n", optopt); errflg++; } } if (errflg) { usage(1); } if(optind + 1 != argc) { fprintf(stderr, "The archive name must be the last argument.\n"); usage(1); } else { ar_name = argv[optind]; } if(filter_mode) { read_filter = get_read_filter(filter_value); if(verbose) { dump_read_filter(read_filter); } } if(chunk_mode && verbose) { dump_chunk_mode(chunk_mode); } if(type_mode && verbose) { dump_type_mode(type_mode); } if(destination_mode && verbose) { dump_destination_mode(destination_mode); } if (!access(ar_name, R_OK)) { if(verbose) { printf("Dumping from archive %s.\n", ar_name); } } else { fprintf(stderr, "Archive %s not found.\n", ar_name); } if(destination_mode & NONE_DEST) { if (NULL == (srf = srf_open(ar_name, "rb"))) { perror(ar_name); return 1; } while (NULL != (ztr = srf_next_ztr(srf, name, 0))) { if(!num_reads_only_mode) { if(!filter_mode || (filter_mode && check_read_name(read_filter, name))) { ++num_reads; } } else { ++num_reads; } delete_ztr(ztr); } } if(destination_mode & CONSOLE_DEST) { /* Cosole means stdout. */ files = malloc((sizeof(FILE *))*5); for(i = 0; i < 5; i++) { files[i] = stdout; } if (NULL == (srf = srf_open(ar_name, "rb"))) { perror(ar_name); return 1; } while (NULL != (ztr = srf_next_ztr(srf, name, 0))) { if(!num_reads_only_mode) { if(!filter_mode || (filter_mode && check_read_name(read_filter, name))) { if(type_mode & SOLEXA) { dump_solexa(ztr, name, chunk_mode, files); } else if(type_mode & TEXT) { dump_text(ztr, name, chunk_mode, files); } else { fprintf(stderr, "Assertion error on type_mode (%c).\nExiting.\n", type_mode); exit(1); } ++num_reads; } } else { ++num_reads; } delete_ztr(ztr); } } if(destination_mode & FILE_DEST) { if(type_mode & SOLEXA) { process_srf_to_solexa_files(ar_name, chunk_mode, num_reads_only_mode, filter_mode, read_filter, &num_reads); } else if(type_mode & TEXT) { process_srf_to_text_files(ar_name, chunk_mode, num_reads_only_mode, filter_mode, read_filter, &num_reads); } else { fprintf(stderr, "Assertion error on type_mode (%c).\nExiting.\n", type_mode); exit(1); } } if(num_reads_mode || num_reads_only_mode) { printf("\nReads: %ld\n", num_reads); } srf_destroy(srf, 1); return 0; }