LIB_EXPORT rc_t CC TableWriterSeq_WriteAlignmentData(const TableWriterSeq* cself, int64_t rowid, const TableWriterData* primary_alignment_id, const TableWriterData* alignment_count) { rc_t rc = 0; if( cself == NULL ) { rc = RC( rcAlign, rcType, rcWriting, rcParam, rcNull); ALIGN_DBGERR(rc); } else if( cself->options & ewseq_co_AlignData ) { rc = RC( rcAlign, rcType, rcWriting, rcParam, rcViolated); ALIGN_DBGERR(rc); } else if( rowid == 0 ) { rc = TableWriter_Flush(cself->base, cself->alignd_cursor_id); } else if( primary_alignment_id == NULL || alignment_count == NULL ) { rc = RC( rcAlign, rcType, rcWriting, rcParam, rcNull); ALIGN_DBGERR(rc); } else if( (rc = TableReader_ReadRow(cself->tmpkey_reader, rowid)) != 0 || cself->cols_read_tmpkey[1].len != primary_alignment_id->elements ) { rc = rc ? rc : RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent); ALIGN_DBGERRP("nreads and primary_alignment_id length %u <> %lu", rc, cself->cols_read_tmpkey[1].len, primary_alignment_id->elements); } else if( primary_alignment_id->elements != alignment_count->elements ) { rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent); ALIGN_DBGERRP("primary_alignment_id and alignment_count length %u <> %lu", rc, primary_alignment_id->elements, alignment_count->elements); } else { if (cself->flush) { rc = TableWriter_Flush(cself->base, cself->alignd_cursor_id); ((TableWriterSeq*)cself)->flush = false; } if( rc == 0 && (rc = TableWriter_OpenRowId(cself->base, rowid, cself->alignd_cursor_id)) == 0 ) { TW_COL_WRITE(cself->base, cself->cols_alignd[0], *primary_alignment_id); TW_COL_WRITE(cself->base, cself->cols_alignd[1], *alignment_count); if( rc == 0 ) { rc = TableWriter_CloseRow(cself->base); } if (rc == 0/* && (cself->options & ewseq_co_WantMateStats)*/) { rc = TableWriterSeq_CollectStatistics((TableWriterSeq *)cself, primary_alignment_id); } } } return rc; }
LIB_EXPORT rc_t CC RefSeqMgr_GetSeq(RefSeqMgr const *const cmgr, RefSeq const **result, char const *seq_id, uint32_t seq_id_sz) { rc_t rc; if (cmgr == NULL || result == NULL || seq_id == NULL) rc = RC(rcAlign, rcIndex, rcConstructing, rcParam, rcNull); else rc = GetSeqInternal((RefSeqMgr *)cmgr, result, seq_id_sz, seq_id); if (rc) ALIGN_DBGERRP("SEQ_ID: '%.*s'", rc, seq_id_sz, seq_id); return rc; }
LIB_EXPORT rc_t CC TableReaderRefSeq_MakePath(const TableReaderRefSeq** cself, const VDBManager* vmgr, const char* path, uint32_t options, size_t cache) { rc_t rc = 0; const VTable* tbl = NULL; if( vmgr == NULL || path == NULL ) { rc = RC(rcAlign, rcType, rcConstructing, rcParam, rcNull); } else if( (rc = VDBManagerOpenTableRead(vmgr, &tbl, NULL, "%s", path)) == 0 ) { rc = TableReaderRefSeq_MakeTable(cself, vmgr, tbl, options, cache); VTableRelease(tbl); } if( rc == 0 ) { /* ALIGN_DBG("table %s opened 0x%p", path, *cself); */ } else { ALIGN_DBGERRP("table for %s", rc, path); } return rc; }
LIB_EXPORT rc_t CC TableReaderRefSeq_MakeTable(const TableReaderRefSeq** cself, const VDBManager* vmgr, const VTable* table, uint32_t options, size_t cache) { rc_t rc = 0; TableReaderRefSeq* self = NULL; if( cself == NULL || table == NULL ) { rc = RC(rcAlign, rcType, rcConstructing, rcParam, rcNull); } else if( (self = calloc(1, sizeof(*self))) == NULL ) { rc = RC(rcAlign, rcType, rcConstructing, rcMemory, rcExhausted); } else { const TableReader* tmp; TableReaderColumn static_cols[] = { /* order important, see code below! */ {0, "MAX_SEQ_LEN", {NULL}, 0, 0}, {0, "SEQ_ID", {NULL}, 0, 0}, {0, "TOTAL_SEQ_LEN", {NULL}, 0, 0}, {0, "CIRCULAR", {NULL}, 0, 0}, {0, "MD5", {NULL}, 0, ercol_Optional}, {0, NULL, {NULL}, 0, 0} }; if( (rc = TableReader_Make(&tmp, table, static_cols, 0)) == 0 ) { if( (rc = TableReader_ReadRow(tmp, 1)) == 0 ) { self->max_seq_len = static_cols[0].base.u32[0]; self->total_seq_len = static_cols[2].base.u64[0]; if( self->total_seq_len != static_cols[2].base.u64[0] ) { rc = RC(rcAlign, rcType, rcConstructing, rcData, rcOutofrange); } if( static_cols[1].base.var != NULL ) { if ( static_cols[ 1 ].len > sizeof( self->seq_id ) - 1 ) { rc = RC( rcAlign, rcType, rcConstructing, rcBuffer, rcInsufficient ); } else { string_copy( self->seq_id, ( sizeof self->seq_id ) - 1, static_cols[1].base.str, static_cols[1].len ); self->seq_id[ static_cols[ 1 ].len ] = '\0'; } } self->circular = static_cols[3].base.buul[0]; if( static_cols[4].base.var != NULL ) { memcpy(self->md5, static_cols[4].base.var, sizeof(self->md5)); self->has_md5 = true; } } TableReader_Whack(tmp); } memcpy(self->cols, TableReaderRefSeq_cols, sizeof(TableReaderRefSeq_cols)); if( options & errefseq_4NA) { self->cols[0].flags |= ercol_Skip; self->cols[1].flags &= ~ercol_Skip; self->read = &self->cols[1]; } else { self->read = &self->cols[0]; } self->seq_len = &self->cols[3]; rc = TableReader_Make(&self->base, table, self->cols, cache); } if( rc == 0 ) { *cself = self; /* ALIGN_DBG("table 0x%p opened 0x%p", table, self); */ } else { TableReaderRefSeq_Whack(self); ALIGN_DBGERRP("table for 0x%p", rc, table); } return rc; }
rc_t RefSeqMgr_ForEachVolume(const RefSeqMgr* cself, RefSeqMgr_ForEachVolume_callback cb, void *data) { rc_t rc = 0; char servers[4096]; char volumes[4096]; if( cself == NULL || cb == NULL ) { rc = RC(rcAlign, rcType, rcConstructing, rcParam, rcNull); } else if( cb(".", NULL, data) ) { /* found in local dir */ } else if( (rc = RefSeqMgr_KfgReadStr(cself->kfg, "refseq/paths", servers, sizeof(servers))) != 0 ) { ALIGN_DBGERRP("%s", rc, "RefSeqMgr_KfgReadStr(paths)"); } else { bool found = false; if( servers[0] != '\0' ) { char *srv_sep; char *srv_rem = servers; do { char const* server = srv_rem; srv_sep = strchr(server, ':'); if(srv_sep) { srv_rem = srv_sep + 1; *srv_sep = 0; } if( cb(server, NULL, data) ) { found = true; break; } } while(srv_sep); } if( !found ) { /* locate refseq servers/volumes in possibly multiple repositories */ if( (rc = RefSeqMgr_KfgReadRepositories(cself->kfg, servers, sizeof(servers))) != 0 ) { ALIGN_DBGERRP("%s", rc, "RefSeqMgr_KfgReadStr(refseq/repository/*)"); }; if( servers[0] != '\0' ) { char *srv_sep; char *srv_rem = servers; do { char const* server = srv_rem; srv_sep = strchr(server, ':'); if(srv_sep) { srv_rem = srv_sep + 1; *srv_sep = 0; } if( cb(server, NULL, data) ) { found = true; break; } } while(srv_sep); } } if( !found ) { if ( (rc = RefSeqMgr_KfgReadStr(cself->kfg, "refseq/servers", servers, sizeof(servers))) != 0 || (rc = RefSeqMgr_KfgReadStr(cself->kfg, "refseq/volumes", volumes, sizeof(volumes))) != 0 ) { ALIGN_DBGERRP("%s", rc, "RefSeqMgr_KfgReadStr(servers/volumes)"); } /* servers and volumes are deprecated and optional */ if( rc == 0 && (servers[0] != '\0' || volumes[0] != '\0') ) { char *srv_sep; char *srv_rem = servers; do { char vol[ 4096 ]; char const *server = srv_rem; char *vol_rem = vol; char *vol_sep; string_copy ( vol, sizeof vol, volumes, string_size( volumes ) ); srv_sep = strchr(server, ':'); if(srv_sep) { srv_rem = srv_sep + 1; *srv_sep = 0; } do { char const *volume = vol_rem; vol_sep = strchr(volume, ':'); if(vol_sep) { vol_rem = vol_sep + 1; *vol_sep = 0; } found = cb(server, volume, data); } while(!found && vol_sep); } while(!found && srv_sep); } } } return rc; }
LIB_EXPORT rc_t CC TableWriterSeq_Write(const TableWriterSeq* cself, const TableWriterSeqData* data, int64_t* rowid) { rc_t rc = 0; int lbl; if( cself == NULL || data == NULL ) { rc = RC( rcAlign, rcType, rcWriting, rcParam, rcNull); ALIGN_DBGERR(rc); } else if( !cself->init && (rc = TableWriteSeq_WriteDefaults(cself)) != 0 ) { ALIGN_DBGERR(rc); } else if( data->quality.buffer == NULL || data->sequence.elements != data->quality.elements ) { rc = RC(rcAlign, rcType, rcWriting, rcData, data->quality.buffer ? rcInconsistent : rcEmpty); ALIGN_DBGERRP("sequence and quality length %lu <> %lu", rc, data->sequence.elements, data->quality.elements); } else if( data->read_start.elements != data->nreads ) { rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent); ALIGN_DBGERRP("nreads and read_start length %u <> %lu", rc, data->nreads, data->read_start.elements); } else if( data->read_len.elements != data->nreads ) { rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent ); ALIGN_DBGERRP("nreads and read_len length %u <> %lu", rc, data->nreads, data->read_len.elements); } else if( (cself->options & ewseq_co_AlignData) && data->primary_alignment_id.elements != data->nreads ) { rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent); ALIGN_DBGERRP("nreads and primary_alignment_id length %u <> %lu", rc, data->nreads, data->primary_alignment_id.elements); } else if( (cself->options & ewseq_co_AlignData) && data->alignment_count.elements != data->nreads ) { rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent); ALIGN_DBGERRP("nreads and alignment_count length %u <> %lu", rc, data->nreads, data->alignment_count.elements); } else if( data->no_quantize_mask.buffer && data->no_quantize_mask.elements != data->quality.elements ) { rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent); ALIGN_DBGERRP("quality and no_quantize_mask length %u <> %lu", rc, data->quality.elements, data->no_quantize_mask.elements); } else if( !(cself->options & ewseq_co_NoLabelData) && (lbl = ((data->label.buffer ? 1 : 0) + (data->label_start.buffer ? 1 : 0) + (data->label_len.buffer ? 1 : 0))) != 0 && lbl != 3 ) { rc = RC(rcAlign, rcType, rcWriting, rcData, rcInconsistent); ALIGN_DBGERRP("LABEL %s", rc, "incomplete"); } else if( (rc = TableWriter_OpenRow(cself->base, rowid, cself->cursor_id)) == 0 ) { if( cself->options & ewseq_co_AlignData ) { TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_PRIMARY_ALIGNMENT_ID], data->primary_alignment_id); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_ALIGNMENT_COUNT], data->alignment_count); if (rc == 0) { rc = TableWriterSeq_CollectStatistics((TableWriterSeq *)cself, &data->primary_alignment_id); } } { unsigned naligned = 0; unsigned i; const uint8_t* ac = data->alignment_count.buffer; for (i = 0; i < (unsigned)data->nreads; ++i) { if (ac[i] != 0) { ++naligned; } } if (naligned == 0) { if (!cself->haveFirstUnaligned) { ((TableWriterSeq *)cself)->firstUnaligned = *rowid; ((TableWriterSeq *)cself)->haveFirstUnaligned = true; } } else if (naligned < (unsigned)data->nreads) { if (!cself->haveFirstHalfAligned) { ((TableWriterSeq *)cself)->firstHalfAligned = *rowid; ((TableWriterSeq *)cself)->haveFirstHalfAligned = true; } } } if( cself->options & ewseq_co_SaveRead ) { TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_READ], data->sequence); } else { uint32_t i; const char* seq = data->sequence.buffer; const uint8_t* ac = data->alignment_count.buffer; const INSDC_coord_zero* rs = data->read_start.buffer; const INSDC_coord_len* rl = data->read_len.buffer; TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_READ], NULL, 0); TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_CSREAD], NULL, 0); for(i = 0; i < data->nreads; i++ ) { if( ac[i] == 0 ) { TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_READ], &seq[rs[i]], rl[i]); TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_CSREAD], &seq[rs[i]], rl[i]); } } } if( cself->options & ewseq_co_FullQuality ) { TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_QUALITY], data->quality); } else { uint64_t i; const uint8_t* b = data->quality.buffer; if( data->quality.elements > cself->qual_buf_sz ) { uint8_t* p = realloc(cself->qual_buf, data->quality.elements * cself->cols[ewseq_cn_QUALITY].element_bits); if( p == NULL ) { rc = RC(rcAlign, rcType, rcWriting, rcMemory, rcExhausted); ALIGN_DBGERRP("quality %s", rc, "discretization"); } else { ((TableWriterSeq*)cself)->qual_buf_sz = data->quality.elements; ((TableWriterSeq*)cself)->qual_buf = p; } } if (data->no_quantize_mask.buffer) { bool const *mask = data->no_quantize_mask.buffer; for(i = 0; i < data->quality.elements; i++ ) { uint8_t const q = b[i] & 0x3F; cself->qual_buf[i] = mask[i] ? q : cself->discrete_qual[q]; } } else { for(i = 0; i < data->quality.elements; i++ ) { cself->qual_buf[i] = cself->discrete_qual[b[i]]; } } if (cself->options & ewseq_co_SaveQual) { TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_QUALITY], cself->qual_buf, data->quality.elements); } else { uint32_t i; uint8_t const *const qual = data->sequence.buffer; uint8_t const *const aligned = data->alignment_count.buffer; INSDC_coord_zero const *const rs = data->read_start.buffer; INSDC_coord_len const *const rl = data->read_len.buffer; TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_QUALITY], NULL, 0); for (i = 0; i < data->nreads; ++i) { if (!aligned[i]) { INSDC_coord_zero const readStart = rs[i]; INSDC_coord_len const readLen = rl[i]; TW_COL_WRITE_BUF(cself->base, cself->cols[ewseq_cn_QUALITY], &qual[readStart], readLen); } } } } if( !(cself->options & ewseq_co_NoLabelData) ) { TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_LABEL], data->label); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_LABEL_START], data->label_start); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_LABEL_LEN], data->label_len); } TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_READ_TYPE], data->read_type); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_READ_START], data->read_start); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_READ_LEN], data->read_len); TW_COL_WRITE_VAR(cself->base, cself->cols[ewseq_cn_TMP_KEY_ID], data->tmp_key_id); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_SPOT_GROUP], data->spot_group); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_CSKEY], data->cskey); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_READ_FILTER], data->read_filter); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_PLATFORM], data->platform); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_TI], data->ti); TW_COL_WRITE(cself->base, cself->cols[ewseq_cn_NAME], data->spot_name); if( rc == 0 ) { rc = TableWriter_CloseRow(cself->base); } } return rc; }