types::Function::ReturnValue sci_tokens(types::typed_list &in, int _iRetCount, types::typed_list &out) { types::String* pOutString = NULL; types::String* pString = NULL; types::String* pCharSample = NULL; wchar_t* seps = NULL; int sizeSeps = 0; if (in.size() > 2 || in.size() == 0) { Scierror(77, _("%s: Wrong number of input argument(s): %d to %d expected.\n"), "tokens", 1, 2); return types::Function::Error; } if (_iRetCount != 1) { Scierror(78, _("%s: Wrong number of output argument(s): %d expected.\n"), "tokens", 1); return types::Function::Error; } // first arg if (in[0]->isString() == false) { Scierror(999, _("%s: Wrong type for input argument #%d: String expected.\n"), "tokens", 1); return types::Function::Error; } pString = in[0]->getAs<types::String>(); if (pString->isScalar() == false) { Scierror(999, _("%s: Wrong size for input argument #%d.\n"), "tokens", 1); return types::Function::Error; } if (wcslen(pString->get(0)) == 0) { types::Double* pOutDouble = types::Double::Empty(); out.push_back(pOutDouble); return types::Function::OK; } // second arg if (in.size() == 2) { if (in[1]->isString() == false) { Scierror(999, _("%s: Wrong type for input argument #%d: String expected.\n"), "tokens", 2); return types::Function::Error; } pCharSample = in[1]->getAs<types::String>(); if (pCharSample->getSize() == 0) { Scierror(999, _("%s: Wrong size for input argument #%d.\n"), "tokens", 2); return types::Function::Error; } sizeSeps = pCharSample->getSize(); seps = (wchar_t*)MALLOC((sizeSeps + 1) * sizeof(wchar_t)); for (int i = 0; i < sizeSeps ; i++) { int iLen = (int)wcslen(pCharSample->get(i)); if (iLen > 1 || iLen < 0) { Scierror(999, _("%s: Wrong type for input argument #%d: Char(s) expected.\n"), "tokens", 2); delete pOutString; return types::Function::Error; } seps[i] = pCharSample->get(i)[0]; } } else // default delimiters are ' ' and Tabulation { sizeSeps = 2; seps = (wchar_t*)MALLOC((sizeSeps + 1) * sizeof(wchar_t)); seps[0] = L' '; seps[1] = L'\t'; } seps[sizeSeps] = L'\0'; // perfom operation int dimsArray[2] = {0, 1}; int dims = 2; wchar_t** Output_Strings = stringTokens(pString->get(0), seps, &dimsArray[0]); FREE(seps); if (Output_Strings == NULL) { //return empty matrix out.push_back(types::Double::Empty()); return types::Function::OK; } else { pOutString = new types::String(dims, dimsArray); pOutString->set(Output_Strings); for (int i = 0 ; i < dimsArray[0] ; i++) { FREE(Output_Strings[i]); } FREE(Output_Strings); } out.push_back(pOutString); return types::Function::OK; }
int Caller::loadEntries( const std::string path) { std::string nextLine; std::string key; std::string chr; std::string refBase; int readDepth; int pos; // Open the sample file std::ifstream inputFile( path.c_str()); if( !inputFile.is_open()) { perror( "Error opening input readcount file"); exit( 1); } // For each line in the sample file (which will correspond to a genomic location) while( std::getline( inputFile, nextLine)) { // Split the line into tokens separated by whitespace (for columns, since this is a tab delimited file) std::istringstream strStream( nextLine); std::istream_iterator<std::string> begin( strStream), end; std::vector<std::string> stringTokens( begin, end); // Get all main fields chr = stringTokens[0]; pos = atoi( stringTokens[1].c_str()); refBase = stringTokens[2]; refBase[0] = toupper( refBase[0]); readDepth = atoi( stringTokens[3].c_str()); // Generate the key, (chr:pos) key = stringTokens[0] + ":" + stringTokens[1]; if( key == "") { std::cout << "Empty key" << std::endl; } // Create the base ReadcountEntry object ReadcountEntry nextReadcountEntry( refBase, readDepth); // Get all subfields for each allele, the 5th column (stringTokens[4]) is garbage due to a bug with the bam-readcount program, ignore it for( int i = 5; i < stringTokens.size(); i++) { std::vector<std::string> nextSubTokens = Common::split( stringTokens[i], ":", true); // Create the Allele objects and add them to the current ReadcountEntry object std::string base = nextSubTokens[0]; int count = atoi( nextSubTokens[1].c_str()); double avgMappingQuality = atof( nextSubTokens[2].c_str()); double avgBaseQuality = atof( nextSubTokens[3].c_str()); double avgSEMappingQuality = atof( nextSubTokens[4].c_str()); int numPlusStrand = atoi( nextSubTokens[5].c_str()); int numMinusStrand = atoi( nextSubTokens[6].c_str()); double avgPosAsFraction = atof( nextSubTokens[7].c_str()); double avgNumMismatchesAsFraction = atof( nextSubTokens[8].c_str()); double avgSumMismatchQualities = atof( nextSubTokens[9].c_str()); int numQ2ContainingReads = atoi( nextSubTokens[10].c_str()); double avgDistanceToQ2StartInQ2Reads = atof( nextSubTokens[11].c_str()); double avgClippedLength = atof( nextSubTokens[12].c_str()); double avgDistanceToEffective3pEnd = atof( nextSubTokens[13].c_str()); bool variant = false; if( base != refBase) { variant = true; } double percentage; if( readDepth != 0) { percentage = ( double) count / ( double) readDepth * 100; } else { percentage = 0; } Allele nextAllele( base, count, avgMappingQuality, avgBaseQuality, avgSEMappingQuality, numPlusStrand, numMinusStrand, avgPosAsFraction, avgNumMismatchesAsFraction, avgSumMismatchQualities, numQ2ContainingReads, avgDistanceToQ2StartInQ2Reads, avgClippedLength, avgDistanceToEffective3pEnd, percentage, variant); nextReadcountEntry.addAllele( nextAllele); } // Now, the ReadcountEntry object is filled, so we can create the Sample object nextReadcountEntry.setMostFreqVariantAllele(); Sample nextSample( path, nextReadcountEntry); // Finally, add the Sample object to the Location object, // Check if the Location object with the current key exists in the hash table std::unordered_map<std::string, Location>::iterator iter = locationTable.find( key); if( iter == locationTable.end()) { // If it does not exist, create the Location object Location newLocation( chr, pos); // Add the new Sample to the Location object newLocation.addSample( nextSample); // Insert the new key-Location pair to the hash table std::pair<std::string, Location> newKeyPair( key, newLocation); locationTable.insert( newKeyPair); } else { bool sampleExists = false; std::vector<Sample> samples = ( iter->second).getSamples(); for( int j = 0; j < samples.size(); j++) { if( samples[j].getSampleName() == nextSample.getSampleName()) { sampleExists = true; } } if( !sampleExists) { ( iter->second).addSample( nextSample); } } } // Check if the file was read correctly if( inputFile.bad()) { perror( "Error reading input readcount file"); } // Close the input sample file inputFile.close(); }