// Given a filename, find it in the current directory. // If it is not in the current directory, return 0. // Otherwise, set the file/dir's first cluster number. int findFilenameCluster(Directory currentDir, char *filename, unsigned int *clusterNum, int *indexPtr) { // Temp filename built from each directory entry. char tempString[13]; // Look through each directory entry of the current directory. for(int i = 0; i < currentDir.size; ++i) { getShortName(currentDir.dirEntries[i], tempString); // Match is found. if(strcmp(tempString, filename) == 0) { *clusterNum = getFirstCluster(currentDir.dirEntries[i]); *indexPtr = i; return 1; } } // Filename wasn't found. return 0; }
cds_word CRPSPreMF::GetInterval(ifstream &input, ofstream &output, string rname, cds_word pos_x, cds_word pos_y, size_t MaxBytes){ cds_word total_reads = 0; cds_word cluster_ref = (cds_word)-1, next = 0; cds_word POS = 0, rel_pos = 0, line_count = 0; cds_word cluster_x, cluster_y, max_cluster, Ini_cluster; cds_word pos_buffRPS = 0, reads_in_cluster = 0, posPre = 0; cds_word last_length = 0, actualPos = 0; string datablock = "", preseq = "", seq = ""; size_t pos_input = 0, sizeAux = 0; bool first = false; cluster_ref = getFirstCluster(rname, &next); //look if rname is valid if(cluster_ref == (cds_word)-1) return total_reads; //empty interval, rname not valid if(next == RNamesCluster.size()) max_cluster = ClusterIndex.size(); else max_cluster = RNamesCluster[next] - 1; cluster_x = cluster_ref + pos_x / IndexRate; Ini_cluster = IndexRate * (cds_word) (pos_x / IndexRate); cluster_y = cluster_ref + pos_y / IndexRate; if(cluster_x > max_cluster) return total_reads; //empty interval, interval not valid if(cluster_y >= max_cluster) cluster_y = max_cluster - 1; while(cluster_x <= cluster_y){ POS = Ini_cluster; if(POS > pos_y) break; if(ClusterIndex[cluster_x] != (cds_word)-1){ pos_input = init_seq + (ClusterIndex[cluster_x] / 8); if((buffRPS == NULL) || (pos_input < init_buffRPS) || ((init_buffRPS + MaxBytes) <= (pos_input + kBytesPerWord * 350))){ input.seekg(pos_input); sizeAux = (end_seq - pos_input); //get block of data if(sizeAux > MaxBytes) sizeAux = MaxBytes; if(buffRPS != NULL) delete [] buffRPS; buffRPS = (cds_word *)(LoadValue<char>(input, sizeAux)); pos_buffRPS = ClusterIndex[cluster_x] - 8 * (ClusterIndex[cluster_x] / 8); init_buffRPS = pos_input; } else{ sizeAux = MaxBytes; if((init_buffRPS + sizeAux) > end_seq) sizeAux = end_seq - init_buffRPS; pos_buffRPS = ClusterIndex[cluster_x] - 8 * (init_buffRPS - init_seq); } ClusterReads = GetVarField(buffRPS, pos_buffRPS, pos_buffRPS + kWordSize - 1); pos_buffRPS += kWordSize; posPre = GetVarField(buffRPS, pos_buffRPS, pos_buffRPS + kWordSize - 1); pos_buffRPS += kWordSize; first = true; if(variableSize) last_length = ClusterOverlap[cluster_x]; else last_length = 0; preseq = GetPressumeSeq(input, posPre, posPre + IndexRate + 350); //get pressume sequence of the block, assuming 350 maximum read length actualPos = 0; reads_in_cluster = ClusterReads; while(reads_in_cluster > 0){ seq = DecompressLine(buffRPS, &pos_buffRPS, preseq, &actualPos, &first, &last_length, &rel_pos); POS += rel_pos; if(POS > pos_y) break; if(POS >= pos_x){ datablock += rname + "\t" + NumtoString(POS) + "\t" + seq + "\n"; line_count ++; total_reads ++; if((line_count % IndexRate) == 0){ check_tmp_string(&datablock, output); if(datablock == "") line_count = 0; } } reads_in_cluster --; if(((8 * sizeAux - pos_buffRPS) < (kWordSize * 350)) && ((init_buffRPS + sizeAux) != end_seq)){ //check if other block of data is neccesary pos_input = init_buffRPS + pos_buffRPS / 8; sizeAux = (end_seq - pos_input); //get block of data if(sizeAux > MaxBytes) sizeAux = MaxBytes; if(buffRPS != NULL) delete [] buffRPS; input.seekg(pos_input); buffRPS = (cds_word *)(LoadValue<char>(input, sizeAux)); init_buffRPS = pos_input; pos_buffRPS = pos_buffRPS - 8 * (pos_buffRPS / 8); } } } cluster_x ++; Ini_cluster += IndexRate; } if(datablock.length() != 0) output << datablock; return total_reads; }
void CRPSPreMF::ComputeCSeq(ifstream &SamFile, ofstream &fp, int SamOrRps){ string line, Ref, Seq, PSeq; ifstream fileTemp; vector<string> tokens; cds_word location = 0; size_t pos_input = 0, end_input = 0; cds_word posPre = 0, length_block = 0, Pos = 0, totalPos = 0; cds_word cluster_ref = 0, last_cluster = (cds_word)-1, next = 0; bitsLength = (cds_word)(ceil(log2(sizeLine.size()))); //bits used to store the line lengths buffer_use = last_pos = farther_pos = 0; last_ref = "*"; tmp_file_string = ""; fileTemp.open(tmp_name.c_str()); //OPEN PRESUMED SEQUENCE pos_input = fileTemp.tellg(); fileTemp.seekg(0, std::ifstream::end); end_input = fileTemp.tellg(); fileTemp.seekg(pos_input); getline(SamFile, line); while (SamFile.good()){ if(line[0] != '@'){ Tokenize(line, tokens, "\t"); if(SamOrRps){ Pos = atoi(tokens[1].c_str()); Ref = tokens[0]; Seq = tokens[2]; } else{ Pos = atoi(tokens[3].c_str()); Ref = tokens[2]; Seq = tokens[9]; } if((tmp_file_string.length() - posPre) < (farther_pos - last_pos + Seq.length())){ pos_input += posPre; fileTemp.seekg(pos_input); length_block = (end_input - pos_input); if(length_block > MAXRAMBYTES) length_block = MAXRAMBYTES; tmp_file_string = ""; tmp_file_string = LoadValue<char>(fileTemp, length_block); posPre = 0; } if(Ref.compare("*")){ //if ref is not '*' if(!last_ref.compare(Ref)){ if(farther_pos > Pos){ posPre += (Pos - last_pos); totalPos += (Pos - last_pos); } else{ posPre += farther_pos - last_pos; totalPos += farther_pos - last_pos; } } else{ //NEW REFERENCE posPre += farther_pos - last_pos; totalPos += farther_pos - last_pos; cluster_ref = getFirstCluster(Ref, &next); } length_block = cluster_ref + Pos / IndexRate; //compute the Index position if(length_block != last_cluster){ //add block info and pointer to the index ClusterReads = ClusterIndex[length_block]; ClusterIndex[length_block] = location; last_cluster = length_block; SetVarField(buffer, buffer_use, buffer_use + kWordSize - 1, ClusterReads); //Save number of read in the block buffer_use += kWordSize; SetVarField(buffer, buffer_use, buffer_use + kWordSize - 1, totalPos); //Save pointer to pressume seq buffer_use += kWordSize; location += 2 * kWordSize; } } else //NOT REFERENCE READ cout << "Error: Not referenced read are ignored so far" << endl; PSeq = tmp_file_string.substr(posPre, Seq.size()); location += CompressLine(Ref, Pos, Seq, PSeq, &buffer, &buffer_use); check_buffer(buffer, &buffer_use, fp); } tokens.clear(); getline(SamFile,line); } if(buffer_use != 0) SaveValue(fp, buffer, (buffer_use + kWordSize - 1) / kWordSize); delete [] buffer; buffer = NULL; }