Exemplo n.º 1
0
types::Function::ReturnValue sci_tokens(types::typed_list &in, int _iRetCount, types::typed_list &out)
{
    types::String* pOutString   = NULL;
    types::String* pString      = NULL;
    types::String* pCharSample  = NULL;
    wchar_t* seps               = NULL;
    int sizeSeps                = 0;

    if (in.size() > 2 || in.size() == 0)
    {
        Scierror(77, _("%s: Wrong number of input argument(s): %d to %d expected.\n"), "tokens", 1, 2);
        return types::Function::Error;
    }
    if (_iRetCount != 1)
    {
        Scierror(78, _("%s: Wrong number of output argument(s): %d expected.\n"), "tokens", 1);
        return types::Function::Error;
    }

    // first arg
    if (in[0]->isString() == false)
    {
        Scierror(999, _("%s: Wrong type for input argument #%d: String expected.\n"), "tokens", 1);
        return types::Function::Error;
    }
    pString = in[0]->getAs<types::String>();
    if (pString->isScalar() == false)
    {
        Scierror(999, _("%s: Wrong size for input argument #%d.\n"), "tokens", 1);
        return types::Function::Error;
    }
    if (wcslen(pString->get(0)) == 0)
    {
        types::Double* pOutDouble = types::Double::Empty();
        out.push_back(pOutDouble);
        return types::Function::OK;
    }

    // second arg
    if (in.size() == 2)
    {
        if (in[1]->isString() == false)
        {
            Scierror(999, _("%s: Wrong type for input argument #%d: String expected.\n"), "tokens", 2);
            return types::Function::Error;
        }
        pCharSample = in[1]->getAs<types::String>();

        if (pCharSample->getSize() == 0)
        {
            Scierror(999, _("%s: Wrong size for input argument #%d.\n"), "tokens", 2);
            return types::Function::Error;
        }
        sizeSeps = pCharSample->getSize();
        seps = (wchar_t*)MALLOC((sizeSeps + 1) * sizeof(wchar_t));
        for (int i = 0; i < sizeSeps ; i++)
        {
            int iLen = (int)wcslen(pCharSample->get(i));
            if (iLen > 1 || iLen < 0)
            {
                Scierror(999, _("%s: Wrong type for input argument #%d: Char(s) expected.\n"), "tokens", 2);
                delete pOutString;
                return types::Function::Error;
            }
            seps[i] = pCharSample->get(i)[0];
        }
    }
    else // default delimiters are ' ' and Tabulation
    {
        sizeSeps = 2;
        seps = (wchar_t*)MALLOC((sizeSeps + 1) * sizeof(wchar_t));
        seps[0] = L' ';
        seps[1] = L'\t';
    }
    seps[sizeSeps] = L'\0';

    // perfom operation
    int dimsArray[2] = {0, 1};
    int dims = 2;

    wchar_t** Output_Strings = stringTokens(pString->get(0), seps, &dimsArray[0]);
    FREE(seps);
    if (Output_Strings == NULL)
    {
        //return empty matrix
        out.push_back(types::Double::Empty());
        return types::Function::OK;
    }
    else
    {
        pOutString  = new types::String(dims, dimsArray);
        pOutString->set(Output_Strings);

        for (int i = 0 ; i < dimsArray[0] ; i++)
        {
            FREE(Output_Strings[i]);
        }
        FREE(Output_Strings);
    }

    out.push_back(pOutString);
    return types::Function::OK;
}
Exemplo n.º 2
0
int Caller::loadEntries( const std::string path)
{
	std::string nextLine;
	std::string key;
	std::string chr;
	std::string refBase;
	int readDepth;
	int pos;

	// Open the sample file
	std::ifstream inputFile( path.c_str());
	if( !inputFile.is_open())
	{
		perror( "Error opening input readcount file");
		exit( 1);
	}

	// For each line in the sample file (which will correspond to a genomic location)
	while( std::getline( inputFile, nextLine))
	{
		// Split the line into tokens separated by whitespace (for columns, since this is a tab delimited file)
		std::istringstream strStream( nextLine);
		std::istream_iterator<std::string> begin( strStream), end;
		std::vector<std::string> stringTokens( begin, end);

		// Get all main fields
		chr = stringTokens[0];
		pos = atoi( stringTokens[1].c_str());
		refBase = stringTokens[2];
		refBase[0] = toupper( refBase[0]);
		readDepth = atoi( stringTokens[3].c_str());

		// Generate the key, (chr:pos)
		key = stringTokens[0] + ":" + stringTokens[1];
		if( key == "")
		{
			std::cout << "Empty key" << std::endl;
		}

		// Create the base ReadcountEntry object
		ReadcountEntry nextReadcountEntry( refBase, readDepth);

		// Get all subfields for each allele, the 5th column (stringTokens[4]) is garbage due to a bug with the bam-readcount program, ignore it
		for( int i = 5; i < stringTokens.size(); i++)
		{
			std::vector<std::string> nextSubTokens = Common::split( stringTokens[i], ":", true);

			// Create the Allele objects and add them to the current ReadcountEntry object
			std::string base = nextSubTokens[0];
			int count = atoi( nextSubTokens[1].c_str());
			double avgMappingQuality = atof( nextSubTokens[2].c_str());
			double avgBaseQuality = atof( nextSubTokens[3].c_str());
			double avgSEMappingQuality = atof( nextSubTokens[4].c_str());
			int numPlusStrand = atoi( nextSubTokens[5].c_str());
			int numMinusStrand = atoi( nextSubTokens[6].c_str());
			double avgPosAsFraction = atof( nextSubTokens[7].c_str());
			double avgNumMismatchesAsFraction = atof( nextSubTokens[8].c_str());
			double avgSumMismatchQualities = atof( nextSubTokens[9].c_str());
			int numQ2ContainingReads = atoi( nextSubTokens[10].c_str());
			double avgDistanceToQ2StartInQ2Reads = atof( nextSubTokens[11].c_str());
			double avgClippedLength = atof( nextSubTokens[12].c_str());
			double avgDistanceToEffective3pEnd = atof( nextSubTokens[13].c_str());

			bool variant = false;
			if( base != refBase)
			{
				variant = true;
			}

			double percentage;
			if( readDepth != 0)
			{
				percentage = ( double) count / ( double) readDepth * 100;
			}
			else
			{
				percentage = 0;
			}

			Allele nextAllele( base, count, avgMappingQuality, avgBaseQuality, avgSEMappingQuality, numPlusStrand, numMinusStrand,
							   avgPosAsFraction, avgNumMismatchesAsFraction, avgSumMismatchQualities, numQ2ContainingReads,
							   avgDistanceToQ2StartInQ2Reads, avgClippedLength, avgDistanceToEffective3pEnd, percentage, variant);

			nextReadcountEntry.addAllele( nextAllele);
		}

		// Now, the ReadcountEntry object is filled, so we can create the Sample object
		nextReadcountEntry.setMostFreqVariantAllele();
		Sample nextSample( path, nextReadcountEntry);

		// Finally, add the Sample object to the Location object,
		// Check if the Location object with the current key exists in the hash table
		std::unordered_map<std::string, Location>::iterator iter = locationTable.find( key);
		if( iter == locationTable.end())
		{
			// If it does not exist, create the Location object
			Location newLocation( chr, pos);

			// Add the new Sample to the Location object
			newLocation.addSample( nextSample);

			// Insert the new key-Location pair to the hash table
			std::pair<std::string, Location> newKeyPair( key, newLocation);
			locationTable.insert( newKeyPair);
		}
		else
		{
			bool sampleExists = false;
			std::vector<Sample> samples = ( iter->second).getSamples();
			for( int j = 0; j < samples.size(); j++)
			{
				if( samples[j].getSampleName() == nextSample.getSampleName())
				{
					sampleExists = true;
				}
			}

			if( !sampleExists)
			{
				( iter->second).addSample( nextSample);
			}
		}
	}

	// Check if the file was read correctly
	if( inputFile.bad())
	{
		perror( "Error reading input readcount file");
	}

	// Close the input sample file
	inputFile.close();
}