void CGapAnalysis::x_AddGapsFromBases( const CSeqMap_CI & seqmap_ci, TSeqIdConstRef bioseq_seq_id, TSeqPos iBioseqLength, TFlag fFlags) { const TSeqPos begin_pos = seqmap_ci.GetPosition(); // get location representing this segment's bases CRef<CSeq_loc> loc_of_bases( new CSeq_loc( *SerialClone(*bioseq_seq_id), begin_pos, (begin_pos + seqmap_ci.GetLength() - 1))); CSeqVector seq_vec( *loc_of_bases, *seqmap_ci.GetScope(), CBioseq_Handle::eCoding_Iupac); const char kGapChar = seq_vec.GetGapChar( CSeqVectorTypes::eCaseConversion_upper); // a simple "runs of unknown bases" algo size_t size_of_curr_gap = 0; size_t start_pos_of_curr_gap = kInvalidSeqPos; CSeqVector_CI seq_vec_ci = seq_vec.begin(); for( ; seq_vec_ci; ++seq_vec_ci ) { if( *seq_vec_ci == kGapChar ) { ++size_of_curr_gap; if( start_pos_of_curr_gap == kInvalidSeqPos ) { start_pos_of_curr_gap = (begin_pos + seq_vec_ci.GetPos()); } } else if( size_of_curr_gap > 0 ) { _ASSERT(start_pos_of_curr_gap != kInvalidSeqPos); AddGap( eGapType_UnknownBases, bioseq_seq_id, size_of_curr_gap, iBioseqLength, start_pos_of_curr_gap, (begin_pos + seq_vec_ci.GetPos()), fFlags); size_of_curr_gap = 0; start_pos_of_curr_gap = kInvalidSeqPos; } } if( size_of_curr_gap > 0 ) { _ASSERT(start_pos_of_curr_gap != kInvalidSeqPos); AddGap( eGapType_UnknownBases, bioseq_seq_id, size_of_curr_gap, iBioseqLength, start_pos_of_curr_gap, (begin_pos + seq_vec_ci.GetPos()), fFlags); } }
void CGapList::AddGap(uint16 part) { if (part >= m_iPartCount) { wxFAIL; return; } uint64 gapstart = part * PARTSIZE; uint64 gapend = gapstart + GetPartSize(part) - 1; AddGap(gapstart, gapend); m_partsComplete[part] = incomplete; }
void CMetfile::FillGap(unsigned int start, unsigned int end) { int todelete = 0; unsigned long newStart, newEnd; GAP * nextGap; newStart = 0; newEnd = 0; GAP * currentGap = gaplist; while (currentGap) { nextGap = currentGap -> next; if (start <= currentGap -> start) { if (end > currentGap -> start) { if (end < currentGap -> end) { currentGap -> start = end; } else { todelete++; } } } else if(start < currentGap -> end) { if (end < currentGap -> end) { newStart = end; newEnd = currentGap -> end; } currentGap -> end = start; } if (todelete) { if (gaplist == currentGap) { gaplist = currentGap -> next; } free(currentGap); todelete = 0; } else if(newEnd) { AddGap(newStart, newEnd); newEnd = 0; } currentGap = nextGap; } }
void CGapList::Init(uint64 fileSize, bool isEmpty) { m_filesize = fileSize; m_iPartCount = fileSize / PARTSIZE + 1; m_sizeLastPart = fileSize % PARTSIZE; // file with size of n * PARTSIZE if (m_sizeLastPart == 0 && fileSize) { // that's only for pre-init in ctor m_sizeLastPart = PARTSIZE; m_iPartCount--; } m_gaplist.clear(); m_partsComplete.clear(); if (isEmpty) { m_partsComplete.resize(m_iPartCount, incomplete); AddGap(0, fileSize - 1); m_totalGapSize = fileSize; } else { m_partsComplete.resize(m_iPartCount, complete); m_totalGapSize = 0; } m_totalGapSizeValid = true; }
void CGapAnalysis::AddBioseqGaps( const CBioseq_Handle & bioseq_h, TAddFlag add_flags, TFlag fFlags, size_t max_resolve_count) { // get CSeq_id of CBioseq TSeqIdConstRef pSeqId = bioseq_h.GetSeqId(); const TSeqPos bioseq_len = bioseq_h.GetBioseqLength(); // fFlags control what we look at CSeqMap::TFlags seq_map_flags = 0; if( add_flags & fAddFlag_IncludeSeqGaps ) { seq_map_flags |= CSeqMap::fFindGap; } if( add_flags & fAddFlag_IncludeUnknownBases ) { seq_map_flags |= CSeqMap::fFindData; } TSeqPos end_of_last_segment = 0; // exclusive bool all_segments_and_in_order = true; SSeqMapSelector selector; selector.SetFlags(seq_map_flags).SetResolveCount(max_resolve_count); CSeqMap_CI seqmap_ci(bioseq_h, selector); for( ; seqmap_ci; ++seqmap_ci ) { if( seqmap_ci.GetPosition() != end_of_last_segment ) { all_segments_and_in_order = false; } end_of_last_segment = seqmap_ci.GetEndPosition(); CSeqMap::ESegmentType seg_type = seqmap_ci.GetType(); switch(seg_type) { case CSeqMap::eSeqGap: _ASSERT(add_flags & fAddFlag_IncludeSeqGaps); AddGap( eGapType_SeqGap, pSeqId, seqmap_ci.GetLength(), bioseq_len, seqmap_ci.GetPosition(), seqmap_ci.GetEndPosition(), fFlags); break; case CSeqMap::eSeqData: _ASSERT(add_flags & fAddFlag_IncludeUnknownBases); x_AddGapsFromBases( seqmap_ci, pSeqId, bioseq_len, fFlags); break; default: NCBI_USER_THROW_FMT( "This segment type is not supported at this time: " << static_cast<int>(seg_type) ); } } if( end_of_last_segment != bioseq_len ) { all_segments_and_in_order = false; } if( ! all_segments_and_in_order ) { ERR_POST( Warning << "Not all segments on bioseq '" << pSeqId->AsFastaString() << "' were in order " "or some positions appear to have been skipped. " "One possible reason is that there were far references for " "which no attempt was made to resolve due to max resolve count " "being reached."); } }
// задание списка изломов void key_SetGaps(void) { if (bKey == bKEY_ENTER) { if (enKeyboard == KBD_ENTER) { if (boGapsFlag == false) { BlockProgram2(wSET_GAPS_FLAG, 1); return; } else { enKeyboard = KBD_POSTINPUT1; // начинаем просмотр списка изломов ShowHi(szGaps); } } else if (enKeyboard == KBD_POSTINPUT2) { tiT.bDay = GetCharLo(0,1); if ((tiT.bDay > 0) && (tiT.bDay <= 31)) { enKeyboard = KBD_INPUT3; szLo[2] = '.'; } else Beep(); } else if (enKeyboard == KBD_POSTINPUT3) { tiT.bMonth = GetCharLo(3,4); tiT.bYear = 0; if ((tiT.bMonth == 0) || (tiT.bMonth > 12)) Beep(); else if (tiT.bDay > GetDaysInMonthM(tiT.bMonth)) { enKeyboard = KBD_INPUT2; LongBeep(); ShowLo(szMaskGap); } else { enKeyboard = KBD_INPUT4; tiT.bSecond = 0; // тип излома (режим работы) ShowGapName(tiT.bSecond); szLo[6] = '.'; } } else if (enKeyboard == KBD_INPUT4) { enKeyboard = KBD_POSTENTER; // переходим в режим ввода if (AddGap() == 1) // добавляем новый излома в список { if (++ibX == bGAPS) // если список заполнен { ibX = 0; // переходим на начало enKeyboard = KBD_SHOW; // переходим в режим просмотра } } else Beep(); // такой излом есть в списке } else if (enKeyboard == KBD_SHOW) // режим проcмотра { ibX++; // переходим на следующий излом if (ibX == GetGapSize()) // дошли до конца списка { if (GetGapSize() == bGAPS) ibX = 0; // если список заполнен - продолжаем просмотр с начала else enKeyboard = KBD_POSTENTER; // если в списке есть свободное место - переходим в режим ввода } } if ((enKeyboard == KBD_POSTINPUT1) || (enKeyboard == KBD_INPUT2)) { // начинаем или продолжаем просмотр списка изломов ibX = 0; // переходим на начало if (GetGapSize() == 0) enKeyboard = KBD_POSTENTER; // если список пустой - переходим в режим ввода else enKeyboard = KBD_SHOW; // если список не пустой - переходим в режим просмотра } if (enKeyboard == KBD_POSTENTER) // режим ввода { enKeyboard = KBD_INPUT2; // подготовка к вводу излома ShowLo(szMaskGap); } else if (enKeyboard == KBD_SHOW) // режим просмотра ShowGap(ibX); // просмотр текущего излома } else if (bKey < 10) { if ((enKeyboard == KBD_INPUT2) || (enKeyboard == KBD_POSTINPUT2)) { enKeyboard = KBD_POSTINPUT2; ShiftLo(0,1); } else if ((enKeyboard == KBD_INPUT3) || (enKeyboard == KBD_POSTINPUT3)) { enKeyboard = KBD_POSTINPUT3; ShiftLo(3,4); } } else if (bKey == bKEY_MINUS) { if (enKeyboard == KBD_SHOW) // в режиме просмотра { DelGap(); // удаляем излом из списка LongBeep(); if (GetGapSize() == 0) { enKeyboard = KBD_INPUT2; ShowHi(szGaps); ShowLo(szMaskGap); } else ShowGap(ibX); } else Beep(); } else if (bKey == bKEY_POINT) { if (enKeyboard == KBD_POSTINPUT2) { tiT.bDay = GetCharLo(0,1); if ((tiT.bDay > 0) && (tiT.bDay <= 31)) { enKeyboard = KBD_INPUT3; szLo[2] = '.'; } else Beep(); } else if (enKeyboard == KBD_INPUT4) { if (++tiT.bSecond >= 12) tiT.bSecond = 0; ShowGapName(tiT.bSecond); szLo[6] = '.'; } else Beep(); } else Beep(); }
void CMetfile::Read() { ClearGaplist(); unsigned char * buffer = new unsigned char[512]; long length = 0; length = mf_metfile -> Length(); tagcount = 0; if (1 == mf_metfile -> Read( & version, 1)) { if (debug) { printf("Version=0x%02x: Length=%lu\n", version, length); } isnewstyle = false; switch (version) { case 0xe0: case 0xe1: isnewstyle = (version == 0xe1); parts = 0; if (!isnewstyle) { if (debug) { printf("...is not newstyle\n"); } mf_metfile -> Seek(24); if (4 == mf_metfile -> Read(buffer, 4)) { if ((buffer[0] == 0) && (buffer[1] == 0) && (buffer[2] == 2) && (buffer[3] == 1)) { isnewstyle = true; } mf_metfile -> Seek(1); if (4 == mf_metfile -> Read( & date, 4)) { if (16 == mf_metfile -> Read(checkid, 16)) { for (int i = 0 ; i < 16 ; i++) { sprintf(mf_filehash + i * 2, "%02x", checkid[i]); } if (2 == mf_metfile -> Read( & parts, 2)) { if (parts_hashtable) { free(parts_hashtable); parts_hashtable = NULL; } if (parts) { int blocklength, position = 0, bytes = 0; blocklength = 16 * parts; parts_hashtable = (unsigned char *) malloc(blocklength); while (position < blocklength) { bytes = mf_metfile -> Read(parts_hashtable + position, 16); if (!bytes) { blocklength = - blocklength; } else { position += bytes; } } if (blocklength <= 0) { blocklength = - blocklength; if (debug) { printf("\nerror: %d(%d)%d ", blocklength, position, bytes); } } } } else { tagcount = - 4; } } else { tagcount = - 3; } } else { tagcount = - 2; } } else { tagcount = - 1; } } else { if (debug) { printf("...is newstyle..."); } if (4 == mf_metfile -> Read(buffer, 4)) { if (debug) { printf("temp-ok %02x,%02x,%02x,%02x\n", buffer[0], buffer[1], buffer[2], buffer[3]); } if (! (buffer[0] | buffer[1] | buffer[2] | buffer[3])) { if (16 == mf_metfile -> Read(checkid, 16)) { if (2 == mf_metfile -> Read( & parts, 2)) { if (parts_hashtable) { free(parts_hashtable); parts_hashtable = NULL; } if (parts) { parts_hashtable = (unsigned char *) malloc(16 * parts); int i = 0; while (i < parts) { if (16 == mf_metfile -> Read( & hash_current, 16)) { memcpy(parts_hashtable + i * 16, & hash_current, 16); i++; } else { i = parts; tagcount = - 13; } } } } else { parts = 0; tagcount = - 12; } } else { tagcount = - 11; } } else { mf_metfile -> Seek(2); if (4 == mf_metfile -> Read( & date, 4)) { if (16 == mf_metfile -> Read(checkid, 16)) { for (int i = 0 ; i < 16 ; i++) { sprintf(mf_filehash + i * 2, "%02x", checkid[i]); } } else { tagcount = - 23; } } else { tagcount = - 22; } } } else { if (debug) { printf("temp-error(-21)\n"); } tagcount = - 21; } } if (!tagcount) { if (4 == mf_metfile -> Read( & tagcount, 4)) { int gapindex = 0, gapstatus = 0, gapstart = 0, gapend = 0, i = 0; while (i < tagcount) { if (debug) { printf("ctag(%dv%d): ", 1 + i, tagcount); } CMetfileTag * CTag = new CMetfileTag(mf_metfile); if (!CTag) { printf("error\n"); } if (debug) { printf(" T=%d L=%d SpT=0x%02x StL=%d ", CTag -> type, CTag -> length, CTag -> specialtagtype, CTag -> stringlength); } switch (CTag -> type) { case 2: case 3: switch (CTag -> specialtagtype) { case 0: switch (CTag -> name[0]) { case 0x09: gapstart = CTag -> value; gapstatus |= 1; gapindex = atoi((char *) CTag -> name + 1); break; case 0x0a: gapstatus |= 2; gapend = CTag -> value; if (gapindex != atoi((char *) CTag -> name + 1)) { printf("gaperror: startindex(%d) != endindex(%d)\n", gapindex, atoi((char *) CTag -> name + 1)); } else if(gapstatus == 3) { AddGap(gapstart, gapend); } else { printf("gaperror: gapstatus(%d)!=3\n", gapstatus); } gapstatus = 0; break; case 0xfe: //ed2k-last-seen-complete time_t timet0; timet0 = (time_t) CTag -> value; if (debug) { printf(" ct=%s ", ctime( & timet0)); } break; default: gapstatus = 0; break; } break; case 1: gapstatus = 0; //filename if (mf_filename == NULL) { mf_filename = (char *) malloc(strlen((char *) CTag -> string) + 1); strcpy(mf_filename, (char *) CTag -> string); } break; case 2: gapstatus = 0; //filesize mf_filesize = CTag -> value; break; case 0x12: gapstatus = 0; //partfile if (mf_partfile == NULL) { mf_partfile = (char *) malloc(strlen((char *) CTag -> string) + 1); strcpy(mf_partfile, (char *) CTag -> string); } break; default: gapstatus = 0; break; } if (debug) { printf("\n"); } break; default: gapstatus = 0; if (debug) { printf("\n"); } break; } delete CTag; i++; } } else { tagcount = - 31; } } if (debug) { printf("Date=%s", asctime(localtime((time_t *) & date))); printf(" Parts=%d tagcount=%d\n", parts, tagcount); } break; default: break; } } delete[] buffer; SortGaplist(); }