Exemple #1
0
// Given a filename, find it in the current directory.
// If it is not in the current directory, return 0.
// Otherwise, set the file/dir's first cluster number.
int findFilenameCluster(Directory currentDir, char *filename,
                        unsigned int *clusterNum, int *indexPtr) {
  // Temp filename built from each directory entry.
  char tempString[13];

  // Look through each directory entry of the current directory.
  for(int i = 0; i < currentDir.size; ++i) {
    getShortName(currentDir.dirEntries[i], tempString);

    // Match is found.
    if(strcmp(tempString, filename) == 0) {
      *clusterNum = getFirstCluster(currentDir.dirEntries[i]);
      *indexPtr = i;
      return 1;
    }
  }
  // Filename wasn't found.
  return 0;
}
Exemple #2
0
cds_word CRPSPreMF::GetInterval(ifstream &input, ofstream &output, string rname, cds_word pos_x, cds_word pos_y, size_t MaxBytes){
	cds_word total_reads = 0;
	cds_word cluster_ref = (cds_word)-1, next = 0; 
	cds_word POS = 0,  rel_pos = 0, line_count = 0; 
	cds_word cluster_x, cluster_y, max_cluster, Ini_cluster;
	cds_word pos_buffRPS = 0, reads_in_cluster = 0, posPre = 0;
	cds_word last_length = 0, actualPos = 0;
	string datablock = "", preseq = "", seq = "";
	size_t pos_input = 0, sizeAux = 0;
	bool first = false; 
	cluster_ref = getFirstCluster(rname, &next); //look if rname is valid
	if(cluster_ref == (cds_word)-1)
		return total_reads;    //empty interval, rname not valid
	if(next == RNamesCluster.size())
		max_cluster = ClusterIndex.size();
	else
		max_cluster =  RNamesCluster[next] - 1;
	cluster_x = cluster_ref + pos_x / IndexRate;
	Ini_cluster =  IndexRate * (cds_word) (pos_x / IndexRate);
	cluster_y = cluster_ref + pos_y / IndexRate;
	if(cluster_x > max_cluster)
		return total_reads;    //empty interval, interval not valid
	if(cluster_y >= max_cluster)
		cluster_y = max_cluster - 1;
	while(cluster_x  <= cluster_y){
		POS = Ini_cluster;
		if(POS > pos_y)
			break;
		if(ClusterIndex[cluster_x] != (cds_word)-1){
			pos_input = init_seq + (ClusterIndex[cluster_x] / 8);
			if((buffRPS == NULL) || (pos_input < init_buffRPS) || ((init_buffRPS + MaxBytes) <= (pos_input + kBytesPerWord * 350))){
				input.seekg(pos_input);
				sizeAux = (end_seq - pos_input); //get block of data
				if(sizeAux > MaxBytes)
					sizeAux = MaxBytes; 
				if(buffRPS != NULL)
					delete [] buffRPS;
				buffRPS = (cds_word *)(LoadValue<char>(input, sizeAux));
				pos_buffRPS = ClusterIndex[cluster_x] - 8 * (ClusterIndex[cluster_x] / 8);
				init_buffRPS = pos_input;
			}
			else{
				sizeAux = MaxBytes;
				if((init_buffRPS + sizeAux) > end_seq)
					sizeAux = end_seq - init_buffRPS;
				pos_buffRPS = ClusterIndex[cluster_x] - 8 * (init_buffRPS - init_seq);
			}
			ClusterReads = GetVarField(buffRPS, pos_buffRPS,  pos_buffRPS + kWordSize - 1);
			pos_buffRPS += kWordSize;
			posPre = GetVarField(buffRPS, pos_buffRPS,  pos_buffRPS + kWordSize - 1);
			pos_buffRPS += kWordSize;
			first = true;
			if(variableSize)
				last_length = ClusterOverlap[cluster_x];
			else
				last_length = 0;
			preseq = GetPressumeSeq(input, posPre, posPre + IndexRate + 350); //get pressume sequence of the block, assuming 350 maximum read length
			actualPos = 0;
			reads_in_cluster = ClusterReads;			
			while(reads_in_cluster > 0){				
				seq = DecompressLine(buffRPS, &pos_buffRPS, preseq, &actualPos, &first, &last_length, &rel_pos);
				POS += rel_pos;
				if(POS > pos_y)
					break;
				if(POS >= pos_x){
					datablock += rname + "\t" +  NumtoString(POS) + "\t" + seq + "\n";
					line_count ++;
					total_reads ++;
					if((line_count % IndexRate) == 0){
						check_tmp_string(&datablock, output);
						if(datablock == "")
							line_count = 0;
					}
				}
				reads_in_cluster --;
				if(((8 * sizeAux - pos_buffRPS) < (kWordSize * 350)) && ((init_buffRPS + sizeAux) != end_seq)){  //check if other block of data is neccesary
					pos_input = init_buffRPS + pos_buffRPS / 8;
					sizeAux = (end_seq - pos_input); //get block of data
					if(sizeAux > MaxBytes)
						sizeAux = MaxBytes;              
					if(buffRPS != NULL)
						delete [] buffRPS;
					input.seekg(pos_input);
					buffRPS = (cds_word *)(LoadValue<char>(input, sizeAux));
					init_buffRPS = pos_input;
					pos_buffRPS = pos_buffRPS - 8 * (pos_buffRPS / 8);
				}
			}
		}
		cluster_x ++;
		Ini_cluster += IndexRate;
	}	
	if(datablock.length() != 0)
		output << datablock;
	return total_reads;
}
Exemple #3
0
void CRPSPreMF::ComputeCSeq(ifstream &SamFile, ofstream &fp, int SamOrRps){
	string line, Ref, Seq, PSeq;
	ifstream fileTemp;
	vector<string> tokens;
	cds_word location = 0;
	size_t pos_input = 0, end_input = 0;
	cds_word posPre = 0,  length_block = 0, Pos = 0, totalPos = 0;
	cds_word cluster_ref = 0, last_cluster = (cds_word)-1, next = 0;
	bitsLength = (cds_word)(ceil(log2(sizeLine.size()))); //bits used to store the line lengths
	buffer_use = last_pos = farther_pos = 0;
	last_ref = "*";
	tmp_file_string = "";
	fileTemp.open(tmp_name.c_str()); //OPEN PRESUMED SEQUENCE
	pos_input = fileTemp.tellg();
	fileTemp.seekg(0, std::ifstream::end);
	end_input = fileTemp.tellg();
	fileTemp.seekg(pos_input);
	getline(SamFile, line);
	while (SamFile.good()){
		if(line[0] != '@'){
			Tokenize(line, tokens, "\t");
			if(SamOrRps){
				Pos = atoi(tokens[1].c_str());
				Ref = tokens[0];
				Seq = tokens[2];
			}
			else{
				Pos = atoi(tokens[3].c_str());      
				Ref = tokens[2];      
				Seq = tokens[9];
			}
			if((tmp_file_string.length() - posPre) < (farther_pos - last_pos + Seq.length())){
				pos_input += posPre;
				fileTemp.seekg(pos_input);
				length_block = (end_input - pos_input);
				if(length_block > MAXRAMBYTES)
					length_block = MAXRAMBYTES;
				tmp_file_string = "";
				tmp_file_string = LoadValue<char>(fileTemp, length_block);
				posPre = 0;
			}
			if(Ref.compare("*")){ //if ref is not '*'
				if(!last_ref.compare(Ref)){
					if(farther_pos  > Pos){
						posPre += (Pos - last_pos);
						totalPos += (Pos - last_pos);
					}
					else{
						posPre += farther_pos - last_pos; 
						totalPos += farther_pos - last_pos; 
					}
				}
				else{ //NEW REFERENCE
					posPre += farther_pos - last_pos;
					totalPos += farther_pos - last_pos;
					cluster_ref = getFirstCluster(Ref, &next);
				}
				length_block = cluster_ref + Pos / IndexRate; //compute the Index position
				if(length_block != last_cluster){ //add block info and pointer to the index
					ClusterReads = ClusterIndex[length_block];
					ClusterIndex[length_block] = location;
					last_cluster = length_block;
					SetVarField(buffer, buffer_use,  buffer_use + kWordSize - 1, ClusterReads); //Save number of read in the block
					buffer_use += kWordSize;
					SetVarField(buffer, buffer_use,  buffer_use + kWordSize - 1, totalPos); //Save pointer to pressume seq
					buffer_use += kWordSize;
					location += 2 * kWordSize;
				}
			}
			else //NOT REFERENCE READ
				cout << "Error: Not referenced read are ignored so far" << endl;
			PSeq = tmp_file_string.substr(posPre, Seq.size());
			location += CompressLine(Ref, Pos, Seq, PSeq, &buffer, &buffer_use);
			check_buffer(buffer, &buffer_use, fp);
		}
		tokens.clear();
		getline(SamFile,line);
	}	
	if(buffer_use != 0)
		SaveValue(fp, buffer,  (buffer_use + kWordSize - 1) / kWordSize);
	delete [] buffer;
	buffer = NULL;
}