static uint64_t get_rowcount( const VTable * tab ) { uint64_t res = 0; col_defs *my_col_defs; if ( vdcd_init( &my_col_defs, 1024 ) ) { if ( vdcd_extract_from_table( my_col_defs, tab ) > 0 ) { const VCursor * cur; rc_t rc = VTableCreateCursorRead( tab, &cur ); if ( rc == 0 ) { if ( vdcd_add_to_cursor( my_col_defs, cur ) ) { rc = VCursorOpen( cur ); if ( rc == 0 ) { uint32_t idx; if ( vdcd_get_first_none_static_column_idx( my_col_defs, cur, &idx ) ) { int64_t first; rc = VCursorIdRange( cur, idx, &first, &res ); } } } VCursorRelease( cur ); } } vdcd_destroy( my_col_defs ); } return res; }
/* detect min and max spot-id from a temp. cursor */ static rc_t SRATableGetMinMax( SRATable * self ) { const VCursor *temp_cursor; rc_t rc; assert( self != NULL ); assert( self->vtbl != NULL); rc = VTableCreateCursorRead( self->vtbl, &temp_cursor ); if ( rc == 0 ) { uint32_t idx; rc = VCursorAddColumn ( temp_cursor, &idx, "READ" ); if ( rc == 0 ) { rc = VCursorOpen( temp_cursor ); if ( rc == 0 ) { int64_t first; uint64_t count; rc = VCursorIdRange( temp_cursor, 0, &first, &count ); if ( rc == 0 ) { self->min_spot_id = first; self->max_spot_id = first + count; self->spot_count = count; } } } VCursorRelease( temp_cursor ); } return rc; }
TVDBRowIdRange CVDBCursor::GetRowIdRange(TVDBColumnIdx column) const { TVDBRowIdRange ret; if ( rc_t rc = VCursorIdRange(*this, column, &ret.first, &ret.second) ) { NCBI_THROW2_FMT(CSraException, eInitFailed, "Cannot get VDB cursor row range: "<<*this<<": "<<column, rc); } return ret; }
static rc_t vdb_fastq_tbl( const p_dump_context ctx, const VTable * tbl, fastq_ctx * fctx ) { rc_t rc = vdb_prepare_cursor( ctx, tbl, fctx ); DISP_RC( rc, "the table lacks READ and/or QUALITY column" ); if ( rc == 0 ) { int64_t first; uint64_t count; rc = VCursorIdRange( fctx->cursor, fctx->idx_read, &first, &count ); DISP_RC( rc, "VCursorIdRange() failed" ); if ( rc == 0 ) { /* if the user did not specify a row-range, take all rows */ if ( vdn_range_defined( ctx->row_generator ) == false ) { vdn_set_range( ctx->row_generator, first, count ); } /* if the user did specify a row-range, check the boundaries */ else { vdn_check_range( ctx->row_generator, first, count ); } if ( vdn_range_defined( ctx->row_generator ) ) { if ( ctx->format == df_fastq ) { if ( fctx->idx_name == INVALID_COLUMN) rc = vdb_fastq_loop_without_name( ctx, fctx ); /* <--- */ else rc = vdb_fastq_loop_with_name( ctx, fctx ); /* <--- */ } else if ( ctx->format == df_fasta ) { if ( ctx->max_line_len == 0 ) ctx->max_line_len = DEF_FASTA_LEN; if ( fctx->idx_name == INVALID_COLUMN) rc = vdb_fasta_loop_without_name( ctx, fctx ); /* <--- */ else rc = vdb_fasta_loop_with_name( ctx, fctx ); /* <--- */ } } else { rc = RC( rcExe, rcDatabase, rcReading, rcRange, rcEmpty ); } } VCursorRelease( fctx->cursor ); } return rc; }
rc_t runChecks(const TestCase& test_case, const VCursor * cursor, uint32_t name_idx, uint32_t name_range_idx) { rc_t rc; int64_t first_id; uint64_t count_id; rc = VCursorIdRange( cursor, name_idx, &first_id, &count_id ); if (rc != 0) { LOGERR( klogInt, rc, "VCursorIdRange() failed" ); return rc; } for (uint64_t row_id = first_id; row_id < first_id + count_id; ++row_id) { const char * name = NULL; uint32_t name_len; RowRange *row_range; rc = VCursorCellDataDirect( cursor, row_id, name_idx, NULL, (void const **)&name, NULL, &name_len ); if ( rc != 0 ) return rc; rc = VCursorParamsSet( ( struct VCursorParams const * )cursor, "QUERY_NAME", "%.*s", name_len, name ); if ( rc != 0 ) return rc; rc = VCursorCellDataDirect( cursor, row_id, name_range_idx, NULL, (void const **)&row_range, NULL, NULL ); if ( rc != 0 ) return rc; std::string name_str(name, name_len); if (test_case.key_ranges.find(name_str) == test_case.key_ranges.end()) { PLOGMSG( klogInt, (klogErr, "Unexpected name '$(NAME)' in test case '$(TC_NAME)'", "TC_NAME=%s,NAME=%s", test_case_name, name_str.c_str()) ); return 1; } RowRange row_range_exp = test_case.key_ranges.find(name_str)->second; if (row_range->start_id != row_range_exp.start_id || row_range->stop_id != row_range_exp.stop_id) { PLOGMSG( klogInt, (klogErr, "Row range for name '$(NAME)' in test case '$(TC_NAME)' does not match. Expected: $(EXP_S)-$(EXP_F), actual: $(ACT_S)-$(ACT_F)", "TC_NAME=%s,NAME=%s,EXP_S=%ld,EXP_F=%ld,ACT_S=%ld,ACT_F=%ld", test_case_name, name_str.c_str(), row_range_exp.start_id, row_range_exp.stop_id, row_range->start_id, row_range->stop_id) ); return 1; } } return rc; }
static rc_t copy_table ( const vtblcp_parms *pb, VCursor *dcurs, const VCursor *scurs, const vtblcp_column_map *cm, uint32_t count, uint32_t rdfilt_idx ) { /* open source */ rc_t rc = VCursorOpen ( scurs ); if ( rc != 0 ) LOGERR ( klogErr, rc, "failed to open source cursor" ); else { /* get row range */ int64_t row, last; uint64_t range_count; rc = VCursorIdRange ( scurs, 0, & row, & range_count ); last = row + range_count; if ( rc != 0 ) LOGERR ( klogInt, rc, "failed to determine row range for source cursor" ); else { /* open desination cursor */ rc = VCursorOpen ( dcurs ); if ( rc != 0 ) LOGERR ( klogErr, rc, "failed to open destination cursor" ); else { /* focus destination on initial source row */ rc = VCursorSetRowId ( dcurs, row ); if ( rc != 0 ) PLOGERR ( klogErr, (klogErr, rc, "failed to set destination cursor row to id '$(row)'", "row=%" LD64, row )); else { /* copy each row */ for ( ; row <= last; ++ row ) { rc = copy_row ( pb, dcurs, scurs, cm, count, rdfilt_idx, row ); if ( rc != 0 ) break; } /* commit changes */ if ( rc == 0 ) rc = VCursorCommit ( dcurs ); } } } } return rc; }
/* Open * open cursor, resolving schema * for the set of opened columns * * NB - there is no corresponding "Close" * use "Release" instead. */ rc_t VTableReadCursorOpen ( const VTableCursor *cself ) { rc_t rc; VTableCursor *self = ( VTableCursor* ) cself; if ( self == NULL ) rc = RC ( rcVDB, rcCursor, rcOpening, rcSelf, rcNull ); else { VLinker *ld = self -> tbl -> linker; KDlset *libs; rc = VLinkerOpen ( ld, & libs ); if ( rc == 0 ) { rc = VCursorOpenRead ( self, libs ); if ( rc == 0 ) { int64_t first; uint64_t count; rc = VCursorIdRange ( & self -> dad, 0, & first, & count ); if ( rc != 0 ) { /* permit empty open when run from sradb */ if ( GetRCState ( rc ) == rcEmpty && GetRCObject ( rc ) == rcRange && self -> permit_add_column && VectorLength ( & self -> dad . row ) == 0 ) { rc = 0; } } else if ( count != 0 ) { /* set initial row id to starting row */ self -> dad . start_id = self -> dad . end_id = self -> dad . row_id = first; } if ( rc != 0 ) self -> dad . state = vcFailed; } KDlsetRelease ( libs ); } } return rc; }
static rc_t cg_dump_adjust_rowrange( cg_dump_ctx * cg_ctx ) { int64_t first; uint64_t count; rc_t rc = VCursorIdRange( cg_ctx->seq_cur, 0, &first, &count ); if ( rc != 0 ) { (void)LOGERR( klogErr, rc, "cannot detect Id-Range for SEQ-cursor" ); } else { rc = num_gen_range_check( ( num_gen * )cg_ctx->rows, first, count ); if ( rc != 0 ) { (void)LOGERR( klogErr, rc, "cannot define range of rows" ); } } return rc; }
static rc_t fetch_all_rows(const VCursor *curs, unsigned ncol, const uint32_t cid[/* ncol */]) { int64_t start; int64_t stop; int64_t row; unsigned i; rc_t rc; for (i = 0; i != ncol; ++i) { int64_t cstart; uint64_t ccount; rc = VCursorIdRange(curs, cid[i], &cstart, &ccount); if (rc) return rc; if (i == 0) { start = cstart; stop = cstart + ccount; } else { if (start > cstart) start = cstart; if (stop < cstart + ccount) stop = cstart + ccount; } } for (row = start; row != stop; ++row) { for (i = 0; i != ncol; ++i) { uint32_t elem_bits; const void *base; uint32_t offset; uint32_t length; rc = VCursorCellDataDirect(curs, row, cid[i], &elem_bits, &base, &offset, &length); if (rc) return rc; } } return 0; }
static rc_t SpotIteratorInit(const char* redactFileName, const Db* db, SpotIterator* self) { rc_t rc = 0; int64_t first = 0; assert(self && db); memset(self, 0, sizeof *self); self->crnSpotId = 1; rc = VCursorIdRange (db->rCursor, db->rFilterIdx, &first, &self->maxSpotId); DISP_RC(rc, "while calling VCursorIdRange"); self->spotToReduct = first - 1; if (rc == 0) { rc = SpotIteratorInitDirectory(); } if (rc == 0) { self->filename = redactFileName; rc = KDirectoryOpenFileRead (__SpotIteratorDirectory, &self->file, "%s", self->filename); if (rc != 0) { PLOGERR(klogErr, (klogErr, rc, "while opening file '$(path)'", "path=%s", self->filename)); } } if (rc == 0) { rc = SpotIteratorReadSpotToRedact(self); } return rc; }
static rc_t LoadObjects ( ObjectTable* data, const VDatabase* db ) { const VTable* tbl; rc_t rc = VDatabaseOpenTableRead(db, &tbl, "OBJECTS"); if (rc == 0) { rc_t rc2; const VCursor *cur; rc = VTableCreateCachedCursorRead( tbl, &cur, CursorCacheSize ); if (rc == 0) { uint32_t id_idx, name_idx, proj_idx, dname_idx, size_idx, csum_idx, enc_idx; if (rc == 0) rc = VCursorAddColumn( cur, &id_idx, "id" ); if (rc == 0) rc = VCursorAddColumn( cur, &name_idx, "name" ); if (rc == 0) rc = VCursorAddColumn( cur, &proj_idx, "project" ); if (rc == 0) rc = VCursorAddColumn( cur, &dname_idx, "display_name" ); if (rc == 0) rc = VCursorAddColumn( cur, &size_idx, "size" ); if (rc == 0) rc = VCursorAddColumn( cur, &csum_idx, "checksum" ); if (rc == 0) rc = VCursorAddColumn( cur, &enc_idx, "encryption_key" ); if (rc == 0 && HasData(tbl)) { rc = VCursorOpen( cur ); if (rc == 0) { int64_t first; uint64_t count; rc = VCursorIdRange( cur, 0, &first, &count ); if (rc == 0) { uint64_t i; for (i=0; i < count; ++i) { const void* ptr; uint32_t elem_count; uint32_t id; String name; String project; String display_name; uint64_t size; String checksum; String encryption_key; rc = VCursorSetRowId(cur, first + i); if (rc == 0) rc = VCursorOpenRow( cur ); if (rc == 0) rc = VCursorCellData( cur, id_idx, NULL, &ptr, NULL, NULL); if (rc == 0) id = *(uint32_t*)ptr; if (rc == 0) rc = VCursorCellData( cur, name_idx, NULL, &ptr, NULL, &elem_count); if (rc == 0) StringInit(&name, (const char*)ptr, elem_count, elem_count); if (rc == 0) rc = VCursorCellData( cur, proj_idx, NULL, &ptr, NULL, &elem_count); if (rc == 0) StringInit(&project, (const char*)ptr, elem_count, elem_count); if (rc == 0) rc = VCursorCellData( cur, dname_idx, NULL, &ptr, NULL, &elem_count); if (rc == 0) StringInit(&display_name, (const char*)ptr, elem_count, elem_count); if (rc == 0) rc = VCursorCellData( cur, size_idx, NULL, &ptr, NULL, NULL); if (rc == 0) size = *(uint32_t*)ptr; if (rc == 0) rc = VCursorCellData( cur, enc_idx, NULL, &ptr, NULL, &elem_count); if (rc == 0) StringInit(&encryption_key, (const char*)ptr, elem_count, elem_count); if (rc == 0) rc = VCursorCellData( cur, csum_idx, NULL, &ptr, NULL, &elem_count); if (rc == 0) StringInit(&checksum, (const char*)ptr, elem_count, elem_count); if (rc == 0) rc = KeyRingDataInsertObject(data, id, &name, &project, &display_name, size, &checksum, &encryption_key); if (rc == 0) rc = VCursorCloseRow( cur ); if (rc != 0) break; } } } } rc2 = VCursorRelease(cur); if (rc == 0) rc = rc2; } rc2 = VTableRelease(tbl); if (rc == 0) rc = rc2; } return rc; }
/** * returns true if checks are passed */ void runChecks ( const char * accession, const CheckCorruptConfig * config, const VCursor * pa_cursor, const VCursor * sa_cursor, const VCursor * seq_cursor ) { rc_t rc; uint32_t pa_has_ref_offset_idx; uint32_t sa_has_ref_offset_idx; uint32_t sa_seq_spot_id_idx; uint32_t sa_seq_read_id_idx; uint32_t sa_pa_id_idx; uint32_t sa_tmp_mismatch_idx; uint32_t seq_pa_id_idx; uint32_t seq_read_len_idx; uint32_t seq_cmp_read_idx; bool has_tmp_mismatch; /* add columns to cursor */ #define add_column(tbl_name, cursor, idx, col_spec) \ rc = VCursorAddColumn( cursor, &idx, col_spec ); \ if ( rc != 0 ) \ throw VDB_ERROR("VCursorAddColumn() failed for " tbl_name " table, " col_spec " column", rc); add_column( "PRIMARY_ALIGNMENT", pa_cursor, pa_has_ref_offset_idx, "(bool)HAS_REF_OFFSET" ); add_column( "SECONDARY_ALIGNMENT", sa_cursor, sa_has_ref_offset_idx, "(bool)HAS_REF_OFFSET" ); add_column( "SECONDARY_ALIGNMENT", sa_cursor, sa_seq_spot_id_idx, "SEQ_SPOT_ID" ); add_column( "SECONDARY_ALIGNMENT", sa_cursor, sa_seq_read_id_idx, "SEQ_READ_ID" ); add_column( "SECONDARY_ALIGNMENT", sa_cursor, sa_pa_id_idx, "PRIMARY_ALIGNMENT_ID" ); add_column( "SEQUENCE", seq_cursor, seq_pa_id_idx, "PRIMARY_ALIGNMENT_ID" ); add_column( "SEQUENCE", seq_cursor, seq_read_len_idx, "READ_LEN" ); add_column( "SEQUENCE", seq_cursor, seq_cmp_read_idx, "CMP_READ" ); // optional columns rc = VCursorAddColumn( sa_cursor, &sa_tmp_mismatch_idx, "TMP_MISMATCH" ); if ( rc == 0 ) has_tmp_mismatch = true; else { has_tmp_mismatch = false; rc = 0; } #undef add_column rc = VCursorOpen( pa_cursor ); if (rc != 0) throw VDB_ERROR("VCursorOpen() failed for PRIMARY_ALIGNMENT table", rc); rc = VCursorOpen( sa_cursor ); if (rc != 0) throw VDB_ERROR("VCursorOpen() failed for SECONDARY_ALIGNMENT table", rc); rc = VCursorOpen( seq_cursor ); if (rc != 0) throw VDB_ERROR("VCursorOpen() failed for SEQUENCE table", rc); int64_t sa_id_first; uint64_t sa_row_count; rc = VCursorIdRange( sa_cursor, sa_pa_id_idx, &sa_id_first, &sa_row_count ); if (rc != 0) throw VDB_ERROR("VCursorIdRange() failed for SECONDARY_ALIGNMENT table, PRIMARY_ALIGNMENT_ID column", rc); bool reported_about_no_pa = false; uint64_t pa_longer_sa_rows = 0; uint64_t pa_longer_sa_limit; if (config->pa_len_threshold_percent > 0) pa_longer_sa_limit = ceil( config->pa_len_threshold_percent * sa_row_count ); else if (config->pa_len_threshold_number == 0 || config->pa_len_threshold_number > sa_row_count) pa_longer_sa_limit = sa_row_count; else pa_longer_sa_limit = config->pa_len_threshold_number; uint64_t sa_row_limit; if (config->sa_cutoff_percent > 0) sa_row_limit = ceil( config->sa_cutoff_percent * sa_row_count ); else if (config->sa_cutoff_number == 0 || config->sa_cutoff_number > sa_row_count) sa_row_limit = sa_row_count; else sa_row_limit = config->sa_cutoff_number; for ( uint64_t i = 0; i < sa_row_count && i < sa_row_limit; ++i ) { int64_t sa_row_id = i + sa_id_first; const void * data_ptr = NULL; uint32_t data_len; uint32_t pa_row_len; uint32_t sa_row_len; uint32_t seq_read_len_len; // SA:HAS_REF_OFFSET rc = VCursorCellDataDirect ( sa_cursor, sa_row_id, sa_has_ref_offset_idx, NULL, (const void**)&data_ptr, NULL, &sa_row_len ); if ( rc != 0 ) throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SECONDARY_ALIGNMENT table, HAS_REF_OFFSET column", sa_row_id, rc); const int64_t * p_seq_spot_id; uint32_t seq_spot_id_len; // SA:SEQ_SPOT_ID rc = VCursorCellDataDirect ( sa_cursor, sa_row_id, sa_seq_spot_id_idx, NULL, (const void**)&p_seq_spot_id, NULL, &seq_spot_id_len ); if ( rc != 0 || p_seq_spot_id == NULL || seq_spot_id_len != 1 ) throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SECONDARY_ALIGNMENT table, SEQ_SPOT_ID column", sa_row_id, rc); int64_t seq_spot_id = *p_seq_spot_id; if (seq_spot_id == 0) { std::stringstream ss; ss << "SECONDARY_ALIGNMENT:" << sa_row_id << " has SEQ_SPOT_ID = " << seq_spot_id; throw DATA_ERROR(ss.str()); } if ( has_tmp_mismatch ) { const char * p_sa_tmp_mismatch; // SA:TMP_MISMATCH rc = VCursorCellDataDirect ( sa_cursor, sa_row_id, sa_tmp_mismatch_idx, NULL, (const void**)&p_sa_tmp_mismatch, NULL, &data_len ); if ( rc != 0 || p_sa_tmp_mismatch == NULL ) throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SECONDARY_ALIGNMENT table, TMP_MISMATCH column", sa_row_id, rc); for ( uint32_t j = 0; j < data_len; ++j ) { if ( p_sa_tmp_mismatch[j] == '=' ) { std::stringstream ss; ss << "SECONDARY_ALIGNMENT:" << sa_row_id << " TMP_MISMATCH contains '='"; throw DATA_ERROR(ss.str()); } } } const int64_t * p_pa_row_id; // SA:PRIMARY_ALIGNMENT_ID rc = VCursorCellDataDirect ( sa_cursor, sa_row_id, sa_pa_id_idx, NULL, (const void**)&p_pa_row_id, NULL, &data_len ); if ( rc != 0 || p_pa_row_id == NULL || data_len != 1 ) throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SECONDARY_ALIGNMENT table, PRIMARY_ALIGNMENT_ID column", sa_row_id, rc); int64_t pa_row_id = *p_pa_row_id; if (pa_row_id == 0) { if (!reported_about_no_pa) { PLOGMSG (klogInfo, (klogInfo, "$(ACC) has secondary alignments without primary", "ACC=%s", accession)); reported_about_no_pa = true; } continue; } // PA:HAS_REF_OFFSET rc = VCursorCellDataDirect ( pa_cursor, pa_row_id, pa_has_ref_offset_idx, NULL, &data_ptr, NULL, &pa_row_len ); if ( rc != 0 ) throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on PRIMARY_ALIGNMENT table, HAS_REF_OFFSET column", pa_row_id, rc); // move on when PA.len equal to SA.len if (pa_row_len == sa_row_len) continue; if (pa_row_len < sa_row_len) { std::stringstream ss; ss << "PRIMARY_ALIGNMENT:" << pa_row_id << " HAS_REF_OFFSET length (" << pa_row_len << ") less than SECONDARY_ALIGNMENT:" << sa_row_id << " HAS_REF_OFFSET length (" << sa_row_len << ")"; throw DATA_ERROR(ss.str()); } // we already know that pa_row_len > sa_row_len ++pa_longer_sa_rows; const int32_t * p_seq_read_id; // SA:SEQ_READ_ID rc = VCursorCellDataDirect ( sa_cursor, sa_row_id, sa_seq_read_id_idx, NULL, (const void**)&p_seq_read_id, NULL, &data_len ); if ( rc != 0 || p_seq_read_id == NULL || data_len != 1 ) throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SECONDARY_ALIGNMENT table, SEQ_READ_ID column", sa_row_id, rc); // one-based read index int32_t seq_read_id = *p_seq_read_id; const uint32_t * p_seq_read_len; // SEQ:READ_LEN rc = VCursorCellDataDirect ( seq_cursor, seq_spot_id, seq_read_len_idx, NULL, (const void**)&p_seq_read_len, NULL, &seq_read_len_len ); if ( rc != 0 || p_seq_read_len == NULL ) throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SEQUENCE table, READ_LEN column", seq_spot_id, rc); if ( seq_read_id < 1 || (uint32_t)seq_read_id > seq_read_len_len ) { std::stringstream ss; ss << "SECONDARY:" << sa_row_id << " SEQ_READ_ID value (" << seq_read_id << ") - 1 based, is out of SEQUENCE:" << seq_spot_id << " READ_LEN range (" << seq_read_len_len << ")"; throw DATA_ERROR(ss.str()); } if (pa_row_len != p_seq_read_len[seq_read_id - 1]) { std::stringstream ss; ss << "PRIMARY_ALIGNMENT:" << pa_row_id << " HAS_REF_OFFSET length (" << pa_row_len << ") does not match its SEQUENCE:" << seq_spot_id << " READ_LEN[" << seq_read_id - 1 << "] value (" << p_seq_read_len[seq_read_id - 1] << ")"; throw DATA_ERROR(ss.str()); } if (pa_longer_sa_rows >= pa_longer_sa_limit) { std::stringstream ss; ss << "Limit violation (pa_longer_sa): there are at least " << pa_longer_sa_rows << " alignments where HAS_REF_OFFSET column is longer in PRIMARY_ALIGNMENT than in SECONDARY_ALIGNMENT"; throw DATA_ERROR(ss.str()); } } int64_t seq_id_first; uint64_t seq_row_count; rc = VCursorIdRange( seq_cursor, seq_pa_id_idx, &seq_id_first, &seq_row_count ); if (rc != 0) throw VDB_ERROR("VCursorIdRange() failed for SEQUENCE table, PRIMARY_ALIGNMENT_ID column", rc); uint64_t seq_row_limit; if (config->seq_cutoff_percent > 0) seq_row_limit = ceil( config->seq_cutoff_percent * seq_row_count ); else if (config->seq_cutoff_number == 0 || config->seq_cutoff_number > seq_row_count) seq_row_limit = seq_row_count; else seq_row_limit = config->seq_cutoff_number; for ( uint64_t i = 0; i < seq_row_count && i < seq_row_limit; ++i ) { int64_t seq_row_id = i + seq_id_first; const void * data_ptr = NULL; uint32_t data_len; const int64_t * p_seq_pa_id; uint32_t seq_pa_id_len; // SEQ:PRIMARY_ALIGNMENT_ID rc = VCursorCellDataDirect ( seq_cursor, seq_row_id, seq_pa_id_idx, NULL, (const void**)&p_seq_pa_id, NULL, &seq_pa_id_len ); if ( rc != 0 || p_seq_pa_id == NULL ) throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SEQUENCE table, PRIMARY_ALIGNMENT_ID column", seq_row_id, rc); const uint32_t * p_seq_read_len; // SEQ:READ_LEN rc = VCursorCellDataDirect ( seq_cursor, seq_row_id, seq_read_len_idx, NULL, (const void**)&p_seq_read_len, NULL, &data_len ); if ( rc != 0 || p_seq_read_len == NULL ) throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SEQUENCE table, READ_LEN column", seq_row_id, rc); if ( seq_pa_id_len != data_len ) { std::stringstream ss; ss << "SEQUENCE:" << seq_row_id << " PRIMARY_ALIGNMENT_ID length (" << seq_pa_id_len << ") does not match SEQUENCE:" << seq_row_id << " READ_LEN length (" << data_len << ")"; throw DATA_ERROR(ss.str()); } uint64_t sum_unaligned_read_len = 0; for ( uint32_t j = 0; j < seq_pa_id_len; ++j ) { if ( p_seq_pa_id[j] == 0 ) { sum_unaligned_read_len += p_seq_read_len[j]; } } // SEQ:CMP_READ rc = VCursorCellDataDirect ( seq_cursor, seq_row_id, seq_cmp_read_idx, NULL, (const void**)&data_ptr, NULL, &data_len ); if ( rc != 0 || data_ptr == NULL ) throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SEQUENCE table, SEQ:CMP_READ column", seq_row_id, rc); if ( sum_unaligned_read_len != data_len ) { std::stringstream ss; ss << "SEQUENCE:" << seq_row_id << " CMP_READ length (" << data_len << ") does not match sum of unaligned READ_LEN values (" << sum_unaligned_read_len << ")"; throw DATA_ERROR(ss.str()); } } if (sa_row_limit < sa_row_count || seq_row_limit < seq_row_count) PLOGMSG (klogInfo, (klogInfo, "$(ACC) looks good (based on first $(SA_CUTOFF) of SECONDARY_ALIGNMENT and $(SEQ_CUTOFF) SEQUENCE rows)", "ACC=%s,SA_CUTOFF=%lu,SEQ_CUTOFF=%lu", accession, sa_row_limit, seq_row_limit)); else PLOGMSG (klogInfo, (klogInfo, "$(ACC) looks good", "ACC=%s", accession)); }
static rc_t process(const char* dbname) { rc_t rc; KHashFile* hf = NULL; rc = KHashFileMake(&hf, NULL); if (rc) { fprintf(stderr, "Couldn't create KHashFile\n"); return rc; } KDirectory* srcdir = NULL; rc = KDirectoryNativeDir(&srcdir); if (rc) { fprintf(stderr, "Failed %d %d", __LINE__, rc); return rc; } VDBManager* mgr = NULL; rc = VDBManagerMakeUpdate(&mgr, NULL); // NULL=No working directory if (rc) { fprintf(stderr, "Failed %d %d", __LINE__, rc); return rc; } VDatabase* db = NULL; rc = VDBManagerOpenDBUpdate(mgr, &db, NULL, dbname); if (rc) { fprintf(stderr, "Failed %d %d", __LINE__, rc); return rc; } const VTable* tbl = NULL; rc = VDatabaseOpenTableRead(db, &tbl, "hdrs"); if (rc) { fprintf(stderr, "Failed %d %d", __LINE__, rc); return rc; } const VCursor* curs = NULL; rc = VTableCreateCursorRead(tbl, &curs); if (rc) { fprintf(stderr, "Failed %d %d", __LINE__, rc); return rc; } uint32_t group_idx = 0; // HDR, TAG, VALUE uint32_t hdr_idx = 0; uint32_t tag_idx = 0; uint32_t value_idx = 0; rc = VCursorAddColumn(curs, &group_idx, "GROUP"); if (rc) { fprintf(stderr, "Failed %d %d", __LINE__, rc); return rc; } rc = VCursorAddColumn(curs, &hdr_idx, "HDR"); if (rc) { fprintf(stderr, "Failed %d %d", __LINE__, rc); return rc; } rc = VCursorAddColumn(curs, &tag_idx, "TAG"); if (rc) { fprintf(stderr, "Failed %d %d", __LINE__, rc); return rc; } rc = VCursorAddColumn(curs, &value_idx, "VALUE"); if (rc) { fprintf(stderr, "Failed %d %d", __LINE__, rc); return rc; } rc = VCursorOpen(curs); if (rc) { fprintf(stderr, "Failed %d %d", __LINE__, rc); return rc; } int64_t start = 0; uint64_t count = 0; rc = VCursorIdRange(curs, 0, &start, &count); if (rc) { fprintf(stderr, "Failed %d %d", __LINE__, rc); return rc; } printf("start=%ld,count=%lu\n", start, count); while (count--) { uint64_t group; uint32_t row_len = 0; rc = VCursorReadDirect(curs, start, group_idx, 64, &group, 1, &row_len); if (rc) { fprintf(stderr, "Failed %d %d", __LINE__, rc); return rc; } printf("group=%lu, row_len=%d\n", group, row_len); char hdr[8]; rc = VCursorReadDirect(curs, start, hdr_idx, 8, &hdr, sizeof(hdr), &row_len); if (rc) { fprintf(stderr, "Failed %d %d", __LINE__, rc); return rc; } hdr[row_len] = '\0'; printf("hdr=%s, row_len=%d\n", hdr, row_len); char tag[8]; rc = VCursorReadDirect(curs, start, tag_idx, 8, &tag, sizeof(tag), &row_len); if (rc) { fprintf(stderr, "Failed %d %d", __LINE__, rc); return rc; } tag[row_len] = '\0'; printf("tag=%s, row_len=%d\n", tag, row_len); char value[8192]; rc = VCursorReadDirect(curs, start, value_idx, 8, &value, sizeof(value), &row_len); if (rc) { fprintf(stderr, "Failed %d %d", __LINE__, rc); return rc; } value[row_len] = '\0'; printf("value=%s, row_len=%d\n", value, row_len); if (!strcmp(hdr, "SQ") && !strcmp(tag, "SN")) { if (check_dup(hf, "SQ:SN", value)) { fprintf(stderr, "Duplicate SQ:SN value '%s'\n", value); } } if (!strcmp(hdr, "RG") && !strcmp(tag, "ID")) { if (check_dup(hf, "RG:ID", value)) { fprintf(stderr, "Duplicate RG:ID value '%s'\n", value); } } if (!strcmp(hdr, "PG") && !strcmp(tag, "ID")) { if (check_dup(hf, "PG:ID", value)) { fprintf(stderr, "Duplicate PG:ID value '%s'\n", value); } } start++; printf("\n"); } printf("Set has %lu elements\n", KHashFileCount(hf)); fprintf(stderr, "Made verifier for %s\n", dbname); KHashFileDispose(hf); VCursorRelease(curs); VTableRelease(tbl); VDatabaseRelease(db); VDBManagerRelease(mgr); KDirectoryRelease(srcdir); return 0; }
rc_t initialize_ref_pos ( PileupIteratorState* pileup_state, VCursor const* cursor_ref, VCursor const* cursor_pa, char const* const* column_names_ref, uint32_t* column_index_ref, size_t column_count_ref, char const* const* column_names_pa, uint32_t* column_index_pa, size_t column_count_pa, char* error_buf, size_t error_buf_size ) { int64_t row_id; uint64_t row_count; uint32_t max_seq_len, row_len; rc_t rc = VCursorIdRange ( cursor_ref, 0, & row_id, & row_count ); /*printf ("REFERENCE table: row_id=%lld, row_count=%llu\n", row_id, row_count);*/ if ( row_count < 1 ) { rc_t res = string_printf ( error_buf, error_buf_size, NULL, "There is no rows in REFERENCE table"); if (res == rcBuffer || res == rcInsufficient) error_buf [ error_buf_size - 1 ] = '\0'; return (rc_t)(-1); } pileup_state->total_row_count = row_count; /* We don't know the reference end id use its name to notice the moment when it changes - this will be the end */ rc = VCursorReadDirect ( cursor_ref, pileup_state->reference_start_id, column_index_ref [COL_NAME], sizeof (pileup_state->ref_name[0]) * 8, pileup_state->ref_name, countof(pileup_state->ref_name), & row_len ); if ( rc != 0 ) { rc_t res = string_printf ( error_buf, error_buf_size, NULL, "ERROR: VCursorReadDirect(ref) failed with error: 0x%08x (%u) [%R]", rc, rc, rc); if (res == rcBuffer || res == rcInsufficient) error_buf [ error_buf_size - 1 ] = '\0'; return rc; } pileup_state->ref_name[ min ( countof(pileup_state->ref_name) - 1, row_len) ] = '\0'; /* Read MAX_SEQ_LEN from the start_row_id and assume that it's the same for all the rest */ rc = VCursorReadDirect ( cursor_ref, pileup_state->reference_start_id, column_index_ref [COL_MAX_SEQ_LEN], sizeof (max_seq_len) * 8, & max_seq_len, 1, & row_len ); if ( rc != 0 ) { rc_t res = string_printf ( error_buf, error_buf_size, NULL, "ERROR: VCursorReadDirect(ref) failed with error: 0x%08x (%u) [%R]", rc, rc, rc); if (res == rcBuffer || res == rcInsufficient) error_buf [ error_buf_size - 1 ] = '\0'; return rc; } pileup_state->max_seq_len = max_seq_len; if ( row_len < 1 ) { rc_t res = string_printf ( error_buf, error_buf_size, NULL, "There is no MAX_SEQ_LEN column for row_id=%lld in REFERENCE table", row_id); if (res == rcBuffer || res == rcInsufficient) error_buf [ error_buf_size - 1 ] = '\0'; return (rc_t)(-1); } printf ("MAX_SEQ_LEN=%lu\n", max_seq_len); pileup_state->slice_start_id = pileup_state->reference_start_id + pileup_state->slice_start/max_seq_len; pileup_state->slice_end_id = pileup_state->slice_length != 0 ? pileup_state->reference_start_id + (pileup_state->slice_start + (int64_t)pileup_state->slice_length)/max_seq_len : (int64_t)pileup_state->total_row_count; printf ("slice position range: [%lld, %llu]\n", pileup_state->slice_start, pileup_state->slice_start + pileup_state->slice_length); /*printf ("slice id range: [%lld, %lld]\n", slice_start_id, slice_end_id);*/ /* Read reference slice_start_id, read OVERLAP_*_POS to find out how many rows we need to read ahead of slice_start_id TODO: this is not implemented yet, insted we read just 10 rows ahead */ /* Set cursor to <read_ahead_rows> rows ahead of slice_start_id and cache corresponding PRIMARY_ALIGNMENTS */ { int64_t current_id = max (pileup_state->reference_start_id, pileup_state->slice_start_id - 10); int64_t stop_id = pileup_state->slice_start_id; uint32_t seq_start; uint32_t dummy; #if USE_SINGLE_BLOB_FOR_ALIGNMENT_IDS != 1 int64_t const* alignment_ids; #endif for (; ; ++current_id) { /* We don't know the current reference end_id read it's name and break when it changes */ char ref_name[ countof (pileup_state->ref_name) ]; rc = VCursorReadDirect ( cursor_ref, current_id, column_index_ref [COL_NAME], sizeof (ref_name[0]) * 8, ref_name, countof(ref_name), & row_len ); if ( rc != 0 ) { rc_t res = string_printf ( error_buf, error_buf_size, NULL, "ERROR: VCursorReadDirect(ref) failed with error: 0x%08x (%u) [%R]", rc, rc, rc); if (res == rcBuffer || res == rcInsufficient) error_buf [ error_buf_size - 1 ] = '\0'; return rc; } ref_name[ min ( countof(ref_name) - 1, row_len) ] = '\0'; if ( current_id > stop_id || strcmp (ref_name, pileup_state->ref_name) ) break; #if USE_SINGLE_BLOB_FOR_ALIGNMENT_IDS == 1 rc = open_blob_for_current_id ( current_id, cursor_ref, & pileup_state->blob_alignment_ids, column_index_ref [COL_PRIMARY_ALIGNMENT_IDS], error_buf, error_buf_size ); if (rc != 0) return rc; #endif /* Read REFERENCE row's SEQ_START column to know the offset */ rc = VCursorReadDirect ( cursor_ref, current_id, column_index_ref [COL_SEQ_START], sizeof (seq_start) * 8, & seq_start, 1, & row_len ); if ( rc != 0 ) { rc_t res = string_printf ( error_buf, error_buf_size, NULL, "ERROR: VCursorReadDirect(ref-seq_start) failed with error: 0x%08x (%u) [%R]", rc, rc, rc); if (res == rcBuffer || res == rcInsufficient) error_buf [ error_buf_size - 1 ] = '\0'; return rc; } pileup_state->current_seq_start = seq_start; /* Read REFERENCE row's PRIMARY_ALIGNMENT_IDS column to iterate through them */ /* elem_bits = sizeof (*pileup_state->alignment_ids) * 8;*/ #if USE_SINGLE_BLOB_FOR_ALIGNMENT_IDS == 1 rc = VBlobCellData ( pileup_state->blob_alignment_ids, current_id, & dummy, & pileup_state->alignment_ids, NULL, & row_len ); if ( rc != 0 ) { rc_t res = string_printf ( error_buf, error_buf_size, NULL, "ERROR: VBlobCellData(ref-pa_ids) failed with error: 0x%08x (%u) [%R], row_len=%u", rc, rc, rc, row_len); if (res == rcBuffer || res == rcInsufficient) error_buf [ error_buf_size - 1 ] = '\0'; return rc; } pileup_state -> size_alignment_ids = row_len; #else rc = VCursorCellDataDirect ( cursor_ref, current_id, column_index_ref [COL_PRIMARY_ALIGNMENT_IDS], NULL, (void const**)(& alignment_ids), 0, & row_len ); /*rc = VCursorReadDirect ( cursor_ref, current_id, column_index_ref [COL_PRIMARY_ALIGNMENT_IDS], sizeof (*pileup_state->alignment_ids) * 8, pileup_state->alignment_ids, countof (pileup_state->alignment_ids), & row_len );*/ if ( rc != 0 ) { rc_t res = string_printf ( error_buf, error_buf_size, NULL, "ERROR: VCursorCellDataDirect(ref-pa_ids) failed with error: 0x%08x (%u) [%R], row_len=%u", rc, rc, rc, row_len); if (res == rcBuffer || res == rcInsufficient) error_buf [ error_buf_size - 1 ] = '\0'; return rc; } rc = PileupIteratorState_SetAlignmentIds ( pileup_state, alignment_ids, row_len ); if ( rc != 0 ) { rc_t res = string_printf ( error_buf, error_buf_size, NULL, "ERROR: PileupIteratorState_SetAlignmentIds failed with error: 0x%08x (%u), row_len=%u", rc, rc, row_len); if (res == rcBuffer || res == rcInsufficient) error_buf [ error_buf_size - 1 ] = '\0'; return rc; } #endif pileup_state->next_alignment_idx = 0; /*pileup_state->size_alignment_ids = row_len;*/ /*printf ("Read %lu PRIMARY_ALIGNMENT_IDS for REFERENCE row_id=%lld:", row_len, current_id);*/ { /*size_t i = 0; for (; i < row_len; ++i) printf(" %lld", pa_ids [i]);*/ /*printf ("\n");*/ } /* For each PRIMARY_ALIGNMENT_ID in alignment_ids: read its start, length and cache it if it intersects the starting position */ rc = add_ref_row_to_cache ( pileup_state, cursor_pa, seq_start, pileup_state->slice_start, pileup_state->alignment_ids, row_len, column_names_pa, column_index_pa, column_count_pa, error_buf, error_buf_size ); if ( rc != 0 ) return rc; } } return rc; }
static rc_t LoadProjects( ProjectTable* data, const VDatabase* db ) { const VTable* tbl; rc_t rc = VDatabaseOpenTableRead(db, &tbl, "PROJECTS"); if (rc == 0) { rc_t rc2; const VCursor *cur; rc = VTableCreateCachedCursorRead( tbl, &cur, CursorCacheSize ); if (rc == 0) { uint32_t id_idx, name_idx, dl_idx, enc_idx; rc = VCursorAddColumn( cur, &id_idx, "id" ); if (rc == 0) rc = VCursorAddColumn( cur, &name_idx, "name" ); if (rc == 0) rc = VCursorAddColumn( cur, &dl_idx, "download_ticket" ); if (rc == 0) rc = VCursorAddColumn( cur, &enc_idx, "encryption_key" ); if (rc == 0 && HasData(tbl)) { rc = VCursorOpen( cur ); if (rc == 0) { int64_t first; uint64_t count; rc = VCursorIdRange( cur, 0, &first, &count ); if (rc == 0) { uint64_t i; for (i=0; i < count; ++i) { const void* ptr; uint32_t elem_count; uint32_t id; String name; String download_ticket; String encryption_key; rc = VCursorSetRowId(cur, first + i); if (rc == 0) rc = VCursorOpenRow( cur ); if (rc == 0) rc = VCursorCellData( cur, id_idx, NULL, &ptr, NULL, NULL); if (rc == 0) id = *(uint32_t*)ptr; if (rc == 0) rc = VCursorCellData( cur, name_idx, NULL, &ptr, NULL, &elem_count); if (rc == 0) StringInit(&name, (const char*)ptr, elem_count, elem_count); if (rc == 0) rc = VCursorCellData( cur, dl_idx, NULL, &ptr, NULL, &elem_count); if (rc == 0) StringInit(&download_ticket, (const char*)ptr, elem_count, elem_count); if (rc == 0) rc = VCursorCellData( cur, enc_idx, NULL, &ptr, NULL, &elem_count); if (rc == 0) StringInit(&encryption_key, (const char*)ptr, elem_count, elem_count); if (rc == 0) rc = KeyRingDataInsertProject(data, id, &name, &download_ticket, &encryption_key); if (rc == 0) rc = VCursorCloseRow( cur ); if (rc != 0) break; } } } } rc2 = VCursorRelease(cur); if (rc == 0) rc = rc2; } rc2 = VTableRelease(tbl); if (rc == 0) rc = rc2; } return rc; }
static rc_t report_deletes_db( const VDBManager *vdb_mgr, const char * path, uint32_t min_len ) { const VDatabase *db; rc_t rc = VDBManagerOpenDBRead( vdb_mgr, &db, NULL, "%s", path ); if ( rc != 0 ) { (void)PLOGERR( klogErr, ( klogInt, rc, "cannot open database $(db_name)", "db_name=%s", path ) ); } else { const VTable *tab; rc = VDatabaseOpenTableRead( db, &tab, "PRIMARY_ALIGNMENT" ); if ( rc != 0 ) { (void)LOGERR( klogErr, rc, "cannot open table PRIMARY_ALIGNMENT" ); } else { const VCursor *cur; rc = VTableCreateCursorRead( tab, &cur ); if ( rc != 0 ) { (void)LOGERR( klogErr, rc, "cannot open cursor on table PRIMARY_ALIGNMENT" ); } else { uint32_t cigar_idx; rc = VCursorAddColumn( cur, &cigar_idx, "CIGAR_SHORT" ); if ( rc != 0 ) { (void)LOGERR( klogErr, rc, "cannot add CIGAR_SHORT to cursor" ); } else { rc = VCursorOpen( cur ); if ( rc != 0 ) { (void)LOGERR( klogErr, rc, "cannot open cursor" ); } else { int64_t first; uint64_t count; rc = VCursorIdRange ( cur, cigar_idx, &first, &count ); if ( rc != 0 ) { (void)LOGERR( klogErr, rc, "cannot detect row-range" ); } else { rc = cigar_loop( cur, cigar_idx, first, count, min_len ); } } } VCursorRelease( cur ); } VTableRelease( tab ); } VDatabaseRelease( db ); } return rc; }
LIB_EXPORT rc_t CC VCursorOpen ( const VCursor *cself ) { rc_t rc; VCursor *self = ( VCursor* ) cself; if ( self == NULL ) rc = RC ( rcVDB, rcCursor, rcOpening, rcSelf, rcNull ); else { VLinker *ld = self -> tbl -> linker; KDlset *libs; rc = VLinkerOpen ( ld, & libs ); if ( rc == 0 ) { rc = VCursorOpenRead ( self, libs ); if ( rc == 0 ) { if ( ! self -> read_only ) { VProdResolve pr; pr . schema = self -> schema; pr . ld = ld; pr . libs = libs; pr . stbl = self -> stbl; pr . curs = self; pr . cache = & self -> prod; pr . owned = & self -> owned; pr . chain = chainEncoding; pr . blobbing = false; pr . ignore_column_errors = false; pr . discover_writable_columns = false; if ( !self -> suspend_triggers ) rc = VProdResolveAddTriggers ( & pr, self -> stbl ); } if ( rc == 0 ) { /* TBD - warn if any input columns are unreferenced by schema */ int64_t first; uint64_t count; rc = VCursorIdRange ( self, 0, & first, & count ); if ( rc != 0 ) { if ( GetRCState ( rc ) == rcEmpty ) { /* writable cursors are expected to be empty */ if ( ! self -> read_only || /* permit empty open when run from sradb */ ( GetRCObject ( rc ) == rcRange && self -> permit_add_column && VectorLength ( & self -> row ) == 0 ) ) { rc = 0; } } } else if ( count != 0 ) { if ( self -> read_only ) { /* set initial row id to starting row */ self -> start_id = self -> end_id = self -> row_id = first; } else { /* set initial row id to append */ self -> start_id = self -> end_id = self -> row_id = first + count; } } } if ( rc != 0 ) self -> state = vcFailed; } KDlsetRelease ( libs ); } } return rc; }