/* * Converts a FILE * to an mFILE *. * Use this for wrapper functions to turn external prototypes requring * FILE * as an argument into internal code using mFILE *. */ mFILE *mfreopen(const char *path, const char *mode_str, FILE *fp) { mFILE *mf; int r = 0, w = 0, a = 0, b = 0, x = 0, mode = 0; /* Parse mode: * r = read file contents (if truncated => don't read) * w = write on close * a = position at end of buffer * x = position at same location as the original fp, don't seek on flush */ if (strchr(mode_str, 'r')) r = 1, mode |= MF_READ; if (strchr(mode_str, 'w')) w = 1, mode |= MF_WRITE | MF_TRUNC; if (strchr(mode_str, 'a')) w = a = 1, mode |= MF_WRITE | MF_APPEND; if (strchr(mode_str, 'b')) b = 1, mode |= MF_BINARY; if (strchr(mode_str, 'x')) x = 1; if (strchr(mode_str, '+')) { w = 1, mode |= MF_READ | MF_WRITE; if (a) r = 1; } if (r) { mf = mfcreate(NULL, 0); if (NULL == mf) return NULL; if (!(mode & MF_TRUNC)) { mf->data = mfload(fp, path, &mf->size, b); mf->alloced = mf->size; if (!a) fseek(fp, 0, SEEK_SET); } } else if (w) { /* Write - initialise the data structures */ mf = mfcreate(NULL, 0); if (NULL == mf) return NULL; } else { fprintf(stderr, "Must specify either r, w or a for mode\n"); return NULL; } mf->fp = fp; mf->mode = mode; if (x) { mf->mode |= MF_MODEX; } if (a) { mf->flush_pos = mf->size; fseek(fp, 0, SEEK_END); } return mf; }
/* * Converts a FILE * to an mFILE *. * Use this for wrapper functions to turn external prototypes requring * FILE * as an argument into internal code using mFILE *. */ mFILE *mfreopen(const char *path, const char *mode_str, FILE *fp) { mFILE *mf; int r = 0, w = 0, a = 0, b = 0, mode = 0; /* Parse mode: * r = read file contents (if truncated => don't read) * w = write on close * a = position at end of buffer */ if (strchr(mode_str, 'r')) r = 1, mode |= MF_READ; if (strchr(mode_str, 'w')) w = 1, mode |= MF_WRITE | MF_TRUNC; if (strchr(mode_str, 'a')) w = a = 1, mode |= MF_WRITE | MF_APPEND; if (strchr(mode_str, 'b')) b = 1, mode |= MF_BINARY; if (strchr(mode_str, 'm')) mode|=MF_MEMORY; if (strchr(mode_str, '+')) { w = 1, mode |= MF_READ | MF_WRITE; if (a) r = 1; } if (r) { mf = mfcreate(NULL, 0); if (!(mode & MF_TRUNC)) { mf->data = mfload(fp, path, &mf->size, b); mf->alloced = mf->size; if (!a) fseek(fp, 0, SEEK_SET); } } else { /* Write - initialise the data structures */ mf = mfcreate(NULL, 0); } mf->fp = fp; mf->mode = mode; if (a) { mf->flush_pos = mf->size; fseek(fp, 0, SEEK_END); } return mf; }
/* * Creates and returns m_channel[0]. * We initialise this on the first attempted read, which then slurps in * all of stdin until EOF is met. */ mFILE *mstdin(void) { if (m_channel[0]) return m_channel[0]; m_channel[0] = mfcreate(NULL, 0); m_channel[0]->fp = stdin; return m_channel[0]; }
/* should return a pointer, which is used as a "magic cookie" to all I/O * functions; return NULL for failure */ void *pc_openasm(char *filename) { #if defined __MSDOS__ || defined PAWN_LIGHT return fopen(filename,"w+t"); #else return mfcreate(filename); #endif }
/* * Stderr as an mFILE. * The code handles stderr by returning m_channel[2], but also checking * for stderr in fprintf (the common usage of it) to auto-flush. */ mFILE *mstderr(void) { if (m_channel[2]) return m_channel[2]; m_channel[2] = mfcreate(NULL, 0); m_channel[2]->fp = stderr; m_channel[2]->mode = MF_WRITE; return m_channel[2]; }
/* * Creates and returns m_channel[1]. This is the fake for stdout. It starts as * an empty buffer which is physically written out only when mfflush or * mfclose are called. */ mFILE *mstdout(void) { if (m_channel[1]) return m_channel[1]; m_channel[1] = mfcreate(NULL, 0); m_channel[1]->fp = stdout; m_channel[1]->mode = MF_WRITE; return m_channel[1]; }
mFILE *find_file_url(char *file, char *url) { char buf[8192], *cp; mFILE *mf = NULL; int maxlen = 8190 - strlen(file), len; hFILE *hf; /* Expand %s for the trace name */ for (cp = buf; *url && cp - buf < maxlen; url++) { if (*url == '%' && *(url+1) == 's') { url++; cp += strlen(strcpy(cp, file)); } else { *cp++ = *url; } } *cp++ = 0; if (!(hf = hopen(buf, "r"))) return NULL; if (NULL == (mf = mfcreate(NULL, 0))) return NULL; while ((len = hread(hf, buf, 8192)) > 0) { if (mfwrite(buf, len, 1, mf) <= 0) { hclose_abruptly(hf); mfdestroy(mf); return NULL; } } if (hclose(hf) < 0 || len < 0) { mfdestroy(mf); return NULL; } mrewind(mf); return mf; }
/* * Looks for a trace name in an SRF archive and returns the binary contents * if found, or NULL if not. */ mFILE *find_reading(srf_t *srf, char *tr_name) { do { int type; switch(type = srf_next_block_type(srf)) { case -1: /* EOF */ return NULL; case SRFB_CONTAINER: if (0 != srf_read_cont_hdr(srf, &srf->ch)) return NULL; break; case SRFB_XML: if (0 != srf_read_xml(srf, &srf->xml)) return NULL; break; case SRFB_TRACE_HEADER: { /* off_t pos = ftell(srf->fp); */ if (0 != srf_read_trace_hdr(srf, &srf->th)) return NULL; #if 0 /* * If the name prefix doesn't match tr_name then skip this entire * block. */ if (0 != strncmp(tr_name, srf->th.id_prefix, strlen(srf->th.id_prefix)) && 0 != srf->th.next_block_offset) { fseek(srf->fp, pos + srf->th.next_block_offset, SEEK_SET); } #endif break; } case SRFB_TRACE_BODY: { mFILE *mf = mfcreate(NULL, 0); srf_trace_body_t tb; char name[512]; if (!mf || 0 != srf_read_trace_body(srf, &tb, 0)) return NULL; sprintf(name, "%s%s", srf->th.id_prefix, tb.read_id); if (strcmp(name, tr_name)) { mfdestroy(mf); if (tb.trace) free(tb.trace); continue; } if (srf->th.trace_hdr_size) mfwrite(srf->th.trace_hdr, 1, srf->th.trace_hdr_size, mf); if (tb.trace_size) mfwrite(tb.trace, 1, tb.trace_size, mf); if (tb.trace) free(tb.trace); mrewind(mf); return mf; } case SRFB_INDEX: { off_t pos = ftello(srf->fp); srf_read_index_hdr(srf, &srf->hdr, 1); /* Skip the index body */ fseeko(srf->fp, pos + srf->hdr.size, SEEK_SET); break; } case SRFB_NULL_INDEX: break; default: fprintf(stderr, "Block of unknown type '%c'. Aborting\n", type); return NULL; } } while (1); return NULL; }
/* should return a pointer, which is used as a "magic cookie" to all I/O * functions; return NULL for failure */ void *pc_openasm(char *filename) { return mfcreate(filename); }
int check_trace_header(Settings *opts, srf_t *srf, Previous_data *seen) { int i; int j; int n; uint8_t *data; uint32_t val; ztr_chunk_t *chunk; if (NULL != srf->mf) { mfrecreate(srf->mf, NULL, 0); } else { srf->mf = mfcreate(NULL, 0); } if (NULL == srf->mf) die("%s\n", strerror(errno)); if (srf->th.trace_hdr_size) { if (1 != mfwrite(srf->th.trace_hdr, srf->th.trace_hdr_size, 1, srf->mf)) { die("mfwrite failed: %s\n", strerror(errno)); } } if (srf->ztr) delete_ztr(srf->ztr); mrewind(srf->mf); if (NULL != (srf->ztr = partial_decode_ztr(srf, srf->mf, NULL))) { srf->mf_pos = mftell(srf->mf); } else { /* We expect this to work for srfs made by illumina2srf */ printf("partial_decode_ztr failed for trace header\n"); srf->mf_pos = 0; return -1; } mfseek(srf->mf, 0, SEEK_END); srf->mf_end = mftell(srf->mf); /* Go through chunks */ for (i = 0; i < srf->ztr->nchunks; i++) { chunk = &srf->ztr->chunk[i]; if (opts->verbosity > 1) { printf(" Chunk %d type %.4s\n", i, (char *) &chunk->type); } switch (chunk->type) { case ZTR_TYPE_HUFF: break; case ZTR_TYPE_BPOS: if (0 != uncompress_chunk(srf->ztr, chunk)) { printf("Couldn't uncompress BPOS chunk\n"); return -1; } n = (chunk->dlength - 4) / 4; for (j = 0; j < n; j++) { data = (uint8_t *) &chunk->data[j * 4 + 4]; val = ( ((uint32_t) data[0]) << 24 | ((uint32_t) data[1]) << 16 | ((uint32_t) data[2]) << 8 | ((uint32_t) data[3])); if (val != j) { printf("BPOS data misses cycles\n"); return -1; } } break; case ZTR_TYPE_REGN: if (0 != uncompress_chunk(srf->ztr, chunk)) { printf("Couldn't uncompress REGN chunk\n"); return -1; } if (NULL == seen->regn) { /* Copy REGN chunk */ seen->regn_meta_sz = chunk->mdlength; seen->regn_meta = smalloc(seen->regn_meta_sz); memcpy(seen->regn_meta, chunk->mdata, seen->regn_meta_sz); seen->regn_sz = chunk->dlength; seen->regn = smalloc(seen->regn_sz); memcpy(seen->regn, chunk->data, seen->regn_sz); } else { /* Compare with last copy */ if (seen->regn_meta_sz != chunk->mdlength || seen->regn_sz != chunk->dlength || 0 != memcmp(seen->regn_meta, chunk->mdata, seen->regn_meta_sz) || 0 != memcmp(seen->regn, chunk->data, seen->regn_sz)) { printf("REGN chunk changed between header blocks\n"); return -1; } } break; case ZTR_TYPE_TEXT: if (0 != uncompress_chunk(srf->ztr, chunk)) { printf("Couldn't uncompress REGN chunk\n"); return -1; } if (NULL == seen->text) { seen->text_sz = chunk->dlength; seen->text = smalloc(seen->text_sz); memcpy(seen->text, chunk->data, seen->text_sz); if (0 != check_text(opts, seen)) return -1; } else { if (seen->text_sz != chunk->dlength || 0 != memcmp(seen->text, chunk->data, seen->text_sz)) { printf("New TEXT chunk found\n"); seen->text_sz = chunk->dlength; seen->text = srealloc(seen->text, seen->text_sz); memcpy(seen->text, chunk->data, seen->text_sz); if (0 != check_text(opts, seen)) return -1; } } break; default: printf("Found unexpected chunk type in header block\n"); return -1; } } return 0; }
mFILE *find_file_url(char *file, char *url) { char buf[8192], *cp; mFILE *mf = NULL, *headers = NULL; int maxlen = 8190 - strlen(file); static CURL *handle = NULL; static int curl_init = 0; char errbuf[CURL_ERROR_SIZE]; *errbuf = 0; if (!curl_init) { if (curl_global_init(CURL_GLOBAL_ALL)) return NULL; if (NULL == (handle = curl_easy_init())) goto error; curl_init = 1; } /* Expand %s for the trace name */ for (cp = buf; *url && cp - buf < maxlen; url++) { if (*url == '%' && *(url+1) == 's') { url++; cp += strlen(strcpy(cp, file)); } else { *cp++ = *url; } } *cp++ = 0; /* Setup the curl */ if (NULL == (mf = mfcreate(NULL, 0)) || NULL == (headers = mfcreate(NULL, 0))) return NULL; if (0 != curl_easy_setopt(handle, CURLOPT_URL, buf)) goto error; if (0 != curl_easy_setopt(handle, CURLOPT_CONNECTTIMEOUT, 60L)) goto error; if (0 != curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, (curl_write_callback)mfwrite)) goto error; if (0 != curl_easy_setopt(handle, CURLOPT_WRITEDATA, mf)) goto error; if (0 != curl_easy_setopt(handle, CURLOPT_HEADERFUNCTION, (curl_write_callback)mfwrite)) goto error; if (0 != curl_easy_setopt(handle, CURLOPT_WRITEHEADER, headers)) goto error; if (0 != curl_easy_setopt(handle, CURLOPT_ERRORBUFFER, errbuf)) goto error; /* Fetch! */ if (0 != curl_easy_perform(handle)) goto error; /* Report errors is approproate. 404 is silent as it may have just been * a search via RAWDATA path, everything else is worth reporting. */ { float version; int response; char nul = 0; mfwrite(&nul, 1, 1, headers); if (2 == sscanf(headers->data, "HTTP/%f %d", &version, &response)) { if (response != 200) { if (response != 404) fprintf(stderr, "%.*s\n", (int)headers->size, headers->data); goto error; } } } if (mftell(mf) == 0) goto error; mfdestroy(headers); mrewind(mf); return mf; error: if (mf) mfdestroy(mf); if (headers) mfdestroy(headers); if (*errbuf) fprintf(stderr, "%s\n", errbuf); return NULL; }
/* * Given the archive name and the level_mode * generate information about the archive * * Note the generated srf file is NOT indexed * * Returns 0 on success. */ int srf_info(char *input, int level_mode, long *read_count, long *chunk_count, uint64_t *chunk_size, long key_count[NCHUNKS][NKEYS], long type_count[NCHUNKS][NTYPES], HashTable *regn_hash, uint64_t *base_count) { srf_t *srf; off_t pos; int type; int count = 0; long trace_body_count = 0; char name[1024]; if (NULL == (srf = srf_open(input, "rb"))) { perror(input); return 1; } while ((type = srf_next_block_type(srf)) >= 0) { switch (type) { case SRFB_CONTAINER: if( trace_body_count ){ if( level_mode & LEVEL_NAME ) printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); trace_body_count = 0; } if (0 != srf_read_cont_hdr(srf, &srf->ch)) { fprintf(stderr, "Error reading container header.\nExiting.\n"); exit(1); } break; case SRFB_XML: if( trace_body_count ){ if( level_mode & LEVEL_NAME ) printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); trace_body_count = 0; } if (0 != srf_read_xml(srf, &srf->xml)) { fprintf(stderr, "Error reading XML.\nExiting.\n"); exit(1); } break; case SRFB_TRACE_HEADER: if( trace_body_count ){ if( level_mode & LEVEL_NAME ) printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); trace_body_count = 0; } if (0 != srf_read_trace_hdr(srf, &srf->th)) { fprintf(stderr, "Error reading trace header.\nExiting.\n"); exit(1); } if( 0 == (level_mode & (LEVEL_CHUNK | LEVEL_BASE)) ) break; /* Decode ZTR chunks in the header */ if (srf->mf) mfdestroy(srf->mf); srf->mf = mfcreate(NULL, 0); if (srf->th.trace_hdr_size) mfwrite(srf->th.trace_hdr, 1, srf->th.trace_hdr_size, srf->mf); if (srf->ztr) delete_ztr(srf->ztr); mrewind(srf->mf); if (NULL != (srf->ztr = partial_decode_ztr(srf, srf->mf, NULL))) { srf->mf_pos = mftell(srf->mf); } else { /* Maybe not enough to decode or no headerBlob. */ /* So delay until decoding the body. */ srf->mf_pos = 0; } mfseek(srf->mf, 0, SEEK_END); srf->mf_end = mftell(srf->mf); break; case SRFB_TRACE_BODY: { srf_trace_body_t old_tb; ztr_t *ztr_tmp; int no_trace = (level_mode & (LEVEL_CHUNK | LEVEL_BASE) ? 0 : 1); if (0 != srf_read_trace_body(srf, &old_tb, no_trace)) { fprintf(stderr, "Error reading trace body.\nExiting.\n"); exit(1); } if (-1 == construct_trace_name(srf->th.id_prefix, (unsigned char *)old_tb.read_id, old_tb.read_id_length, name, 512)) { fprintf(stderr, "Error constructing trace name.\nExiting.\n"); exit(1); } trace_body_count++; if( 1 == trace_body_count ){ if( level_mode & LEVEL_NAME ) printf( "trace_name: %s + %s", srf->th.id_prefix, name+strlen(srf->th.id_prefix)); } read_count[READ_TOTAL]++; if (old_tb.flags & SRF_READ_FLAG_BAD_MASK ){ read_count[READ_BAD]++; } else { read_count[READ_GOOD]++; } if( 0 == (level_mode & (LEVEL_CHUNK | LEVEL_BASE)) ) break; if (!srf->mf) { fprintf(stderr, "Error reading trace body.\nExiting.\n"); exit(1); } mfseek(srf->mf, srf->mf_end, SEEK_SET); if (old_tb.trace_size) { mfwrite(old_tb.trace, 1, old_tb.trace_size, srf->mf); free(old_tb.trace); old_tb.trace = NULL; } mftruncate(srf->mf, mftell(srf->mf)); mfseek(srf->mf, srf->mf_pos, SEEK_SET); if (srf->ztr) ztr_tmp = ztr_dup(srf->ztr); /* inefficient, but simple */ else ztr_tmp = NULL; if ((ztr_tmp = partial_decode_ztr(srf, srf->mf, ztr_tmp))) { int i; for (i=0; i<ztr_tmp->nchunks; i++) { int ichunk = -1; switch (ztr_tmp->chunk[i].type) { case ZTR_TYPE_BASE: ichunk = CHUNK_BASE; chunk_size[ichunk] += ztr_tmp->chunk[i].dlength; if( parse_base(ztr_tmp, &ztr_tmp->chunk[i], base_count) ){ delete_ztr(ztr_tmp); return 1; } break; case ZTR_TYPE_CNF1: ichunk = CHUNK_CNF1; chunk_size[ichunk] += ztr_tmp->chunk[i].dlength; break; case ZTR_TYPE_CNF4: ichunk = CHUNK_CNF4; chunk_size[ichunk] += ztr_tmp->chunk[i].dlength; break; case ZTR_TYPE_SAMP: ichunk = CHUNK_SAMP; chunk_size[ichunk] += ztr_tmp->chunk[i].dlength; break; case ZTR_TYPE_SMP4: ichunk = CHUNK_SMP4; chunk_size[ichunk] += ztr_tmp->chunk[i].dlength; break; case ZTR_TYPE_REGN: ichunk = CHUNK_REGN; chunk_size[ichunk] += ztr_tmp->chunk[i].dlength; if( NULL == parse_regn(ztr_tmp, &ztr_tmp->chunk[i], regn_hash) ){ delete_ztr(ztr_tmp); return 1; } break; default: break; } if( ichunk > -1 ) { chunk_count[ichunk]++; count_mdata_keys(ztr_tmp, &ztr_tmp->chunk[i], ichunk, key_count, type_count); } } } if( ztr_tmp ) delete_ztr(ztr_tmp); count++; if( (level_mode == LEVEL_CHECK) && (count == 10) ){ printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); srf_destroy(srf, 1); return 0; } break; } case SRFB_INDEX: { off_t pos = ftell(srf->fp); if( trace_body_count ){ if( level_mode & LEVEL_NAME ) printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); trace_body_count = 0; } printf( "Reading srf index block\n"); if (0 != srf_read_index_hdr(srf, &srf->hdr, 1)) { srf_destroy(srf, 1); fprintf(stderr, "Error reading srf index block header.\nExiting.\n"); exit(1); } /* Skip the index body */ fseeko(srf->fp, pos + srf->hdr.size, SEEK_SET); break; } case SRFB_NULL_INDEX: { uint64_t ilen; if( trace_body_count ){ if( level_mode & LEVEL_NAME ) printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); trace_body_count = 0; } printf( "Reading srf null index block\n"); /* * Maybe the last 8 bytes of a the file (or previously was * last 8 bytes prior to concatenating SRF files together). * If so it's the index length and should always be 8 zeros. */ if (1 != fread(&ilen, 8, 1, srf->fp)) { srf_destroy(srf, 1); fprintf(stderr, "Error reading srf null index block.\nExiting.\n"); exit(1); } if (ilen != 0) { srf_destroy(srf, 1); fprintf(stderr, "Invalid srf null index block.\nExiting.\n"); exit(1); } break; } default: srf_destroy(srf, 1); fprintf(stderr, "Block of unknown type '%c'\nExiting.\n", type); exit(1); } } if( trace_body_count ){ if( level_mode & LEVEL_NAME ) printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); trace_body_count = 0; } /* the type should be -1 (EOF) */ if( type != -1 ) { fprintf(stderr, "Block of unknown type '%c'\nExiting.\n", type); exit(1); } /* are we really at the end of the srf file */ pos = ftell(srf->fp); fseek(srf->fp, 0, SEEK_END); if( pos != ftell(srf->fp) ){ fprintf(stderr, "srf file is corrupt\n"); exit(1); } srf_destroy(srf, 1); return 0; }