Пример #1
0
TraversalKind getTraversalKind (int argc, char* argv[])
{
    const char* STR_TRAVERSAL_MODE = "-traversal";

    TraversalKind result;

    // We create a command line parser.
    OptionsParser parser ("Traversal");
    parser.push_back (new OptionOneParam (STR_TRAVERSAL_MODE, "traversal mode ('unitig' or 'contig'",  true));

    // We retrieve the traversal kind.
    try
    {
        IProperties* props = parser.parse (argc, argv);

        parse (props->getStr(STR_TRAVERSAL_MODE), result);
    }
    catch (OptionFailure& e)
    {
        e.displayErrors (std::cout);
        exit (EXIT_FAILURE);
    }
    catch (Exception& e)
    {
        cout << e.getMessage() << endl;
        exit (EXIT_FAILURE);
    }

    return result;
}
Пример #2
0
	void execute ()
	{
		IProperties* args = getInput();

		u_int32_t seed1;
		u_int32_t seed2;
		u_int32_t dummy;
		u_int8_t kmerSize1;
		u_int8_t kmerSize2;
		string inputFilename1 = args->getStr(STR_SIMKA_URI_INPUT_1);
		string inputFilename2 = args->getStr(STR_SIMKA_URI_INPUT_2);
		SimkaMinCommons::getKmerInfos(inputFilename1, kmerSize1, dummy, seed1, dummy);
		SimkaMinCommons::getKmerInfos(inputFilename2, kmerSize2, dummy, seed2, dummy);
		//size_t kmerSize = getInput()->getInt (STR_KMER_SIZE);

		if(kmerSize1 != kmerSize2){
			cerr << "ERROR: can't compare both sketches because of different kmer sizes (" << kmerSize1 << " and " << kmerSize2 << ")" << endl;
			exit(1);
		}

		if(seed1 != seed2){
			cerr << "ERROR: can't compare both sketches because of different seeds (" << seed1 << " and " << seed2 << ")" << endl;
			exit(1);
		}

		//cout << seed1 << " " << seed2 << endl;
		SimkaMinDistanceAlgorithm* algo = new SimkaMinDistanceAlgorithm(args);
		algo->execute();
		delete algo;
	}
Пример #3
0
StringBuffer& CLogThread::serializeRequest(IEspContext& context,IInterface& logInfo, StringBuffer& returnStr)
{
    IRpcSerializable* rpcreq = dynamic_cast<IRpcSerializable*>(&logInfo);
    if(rpcreq==NULL)
        throw MakeStringException(500,"Issue serializing log information");

    // We want to serialize anything here for logging purpose: e.g., internal user fields: CompanyId
    // rpcreq->serialize(&context,returnStr, "LogData");
    // rpcreq->serialize(NULL,returnStr, "LogData");

    //BUG#26047
    //logInfo function parameter is instance of the incoming request object of the service.
    //instance objects of context and request are dependent upon the protocol binding.
    //Request parameters are relevent for HTTP protocol but are not relevent for protocolX.
    //Since request parameters pointer is not initilized in processing protocolX request it remains NULL
    //and causing this crash.
    IProperties* params = context.queryRequestParameters();
    if(params!=NULL)
    {
        bool notInternal = !params->hasProp("internal");
        if (notInternal)
            params->setProp("internal","1");
        rpcreq->serialize(&context,returnStr, "LogData");
        if (notInternal)
            params->removeProp("internal");
    }else{
        rpcreq->serialize(NULL,returnStr, "LogData");
    }

    return returnStr;
}
Пример #4
0
void CRpcMessage::add_attr(const char * path, const char * name, const char * value, IProperties & attrs)
{
    if ((path && *path) || (name && *name))
    {
        CSoapValue *par=m_params.get();
        if(path)
            par = par->get_value(path);
        if (name)
            par = par->get_value(name);
        if (par)
        {
            Owned<IPropertyIterator> piter = attrs.getIterator();       
            for (piter->first(); piter->isValid(); piter->next())
            {
                const char *propkey = piter->getPropKey();
                par->add_attribute(propkey, attrs.queryProp(propkey));
            }
        }
    }
    else
    {
        Owned<IPropertyIterator> piter = attrs.getIterator();
        for (piter->first(); piter->isValid(); piter->next())
        {
            const char *propkey = piter->getPropKey();
            add_attribute(propkey, attrs.queryProp(propkey));
        }
    }
}
Пример #5
0
	void parseArgs(){

		_options = getInput();

		//_sketchSize = _options->getInt(STR_SIMKA_SKETCH_SIZE);
	    _nbCores = _options->getInt(STR_NB_CORES);
		_inputFilename1 = _options->getStr(STR_SIMKA_URI_INPUT_1);
		_inputFilename2 = _options->getStr(STR_SIMKA_URI_INPUT_2);
		_outputDir = _options->getStr(STR_URI_OUTPUT);

		_start_i = _options->getInt("-start-i");
		_start_j = _options->getInt("-start-j");
		_n_i = _options->getInt("-n-i");
		_n_j = _options->getInt("-n-j");

		//_kmerSize = _options->getInt(STR_KMER_SIZE);


		if(!System::file().doesExist(_outputDir)){
			int ok = System::file().mkdir(_outputDir, -1);
			if(ok != 0){
				  std::cerr << "Error: can't create output directory (" << _outputDir << ")" << std::endl;
				  exit(1);
			}
		}
	}
Пример #6
0
bool CXmlScope::_getValue(const char* x, StringBuffer &ret)
{
    if (locals && locals->hasProp(x))
    {
        locals->getProp(x, ret);
        return true;
    }
    return root->getProp(x,ret);
};
Пример #7
0
int main (int argc, char* argv[])
{
    // We create a command line parser.
    OptionsParser parser ("SortingCount");
    parser.push_back (new OptionOneParam (STR_URI_INPUT, "sorting count input", true));

    try
    {
        // Shortcuts.
        typedef Kmer<>::Count Count;
        typedef Kmer<>::Type  Type;

        // We parse the user options.
        IProperties* options = parser.parse (argc, argv);

        // We load the object storing the couples [kmer,abundance]
        Storage* storage = StorageFactory(STORAGE_HDF5).load (options->getStr(STR_URI_INPUT));   LOCAL (storage);

        // We get the group inside the storage object
        Group& dskGroup = storage->getGroup("dsk");

        // We retrieve the partition holding the couples [kmer,abundance]
        Partition<Count>& solidKmers = dskGroup.getPartition<Count> ("solid");

        // Now, we read the couples in two ways, computing a checksum in each case.
        Type checksum1, checksum2;

        // CASE 1: we read the couples [kmer,abundance] with an iterator over the whole partition
        Iterator<Count>* it = solidKmers.iterator();  LOCAL (it);
        for (it->first(); !it->isDone(); it->next())   {   checksum1 = checksum1 + it->item().value;  }

        // CASE 2: we read the couples [kmer,abundance] with an iterator over each collection of the partition
        for (size_t i=0; i<solidKmers.size(); i++)
        {
            // We get the current collection inside the partition
            Collection<Count>& collection = solidKmers [i];

            Iterator<Count>* it = collection.iterator();  LOCAL (it);
            for (it->first(); !it->isDone(); it->next())   {   checksum2 = checksum2 + it->item().value;  }
        }

        // We check that we got the same checksum
        cout << "checksum1=" << checksum1 << endl;
        cout << "checksum2=" << checksum1 << endl;
    }
    catch (OptionFailure& e)
    {
        return e.displayErrors (std::cout);
    }
    catch (Exception& e)
    {
        std::cerr << "EXCEPTION: " << e.getMessage() << std::endl;
    }

    return EXIT_SUCCESS;
}
Пример #8
0
int main (int argc, char* argv[])
{
    /** We create a command line parser. */
    OptionsParser parser ("BankFilter");
    parser.push_back (new OptionOneParam (STR_URI_INPUT,    "bank reference",               true));
    parser.push_back (new OptionOneParam (STR_URI_SEQ_IDS,  "file holding indexes of bank", true));

    try
    {
        /** We parse the user options. */
        IProperties* options = parser.parse (argc, argv);

        /** We read the list of indexes. */
        set<size_t> indexes;
        FILE* file = fopen (options->getStr(STR_URI_SEQ_IDS).c_str(), "r");
        if (file != 0)
        {
            char buffer[128];
            while (fgets (buffer, sizeof(buffer), file))  {  indexes.insert (atoi(buffer));  }
            fclose (file);
        }

        cout << "found " << indexes.size() << " indexes" << endl;

        /** We open the output bank. */
        string outputBankUri = options->getStr(STR_URI_INPUT) + "_" + System::file().getBaseName (options->getStr(STR_URI_SEQ_IDS));
        IBank* outputBank = Bank::open (outputBankUri);
        LOCAL (outputBank);

        /** We loop the input bank. */
        IBank* inputBank = Bank::open (options->getStr(STR_URI_INPUT));
        LOCAL (inputBank);

        /** We use another iterator for filtering out some sequences. */
        FilterIterator<Sequence,FilterFunctor> itSeq (inputBank->iterator(), FilterFunctor(indexes));

        /** We loop the sequences. */
        for (itSeq.first(); !itSeq.isDone(); itSeq.next())
        {
            outputBank->insert (itSeq.item());
        }

        /** We flush the output bank. */
        outputBank->flush();
    }

    catch (OptionFailure& e)
    {
        return e.displayErrors (cout);
    }
    catch (Exception& e)
    {
        cerr << "EXCEPTION: " << e.getMessage() << endl;
    }
}
Пример #9
0
/* return false if the name is already defined. */
bool CXmlScope::declareValue(const char *name)
{
    if (locals && locals->hasProp(name))
        return false;
    
    if (!locals)
        locals = createProperties(true);
    locals->setProp(name, "");

    return true;
};
Пример #10
0
int main (int argc, char* argv[])
{
    /** We create a command line parser. */
    OptionsParser parser ("BankFilter");
    parser.push_back (new OptionOneParam (STR_URI_INPUT,     "bank input",   true));
    parser.push_back (new OptionOneParam (STR_FILTER_RATIO,  "skip a sequence if 'good letters number / seq.len > X'",   false, "0.8"));

    try
    {
        /** We parse the user options. */
        IProperties* options = parser.parse (argc, argv);

        /** Shortcuts. */
        double percentThreshold = options->getDouble(STR_FILTER_RATIO);

        /** We open the input bank. */
        IBank* inBank = Bank::open (options->getStr(STR_URI_INPUT));
        LOCAL (inBank);

        /** We create the output inBank. */
        IBank* outBank = new BankFasta (options->getStr(STR_URI_INPUT) + "_filtered");
        LOCAL (outBank);

        /** We iterate the inBank. NOTE: WE USE A LAMBDA EXPRESSION HERE. */
        inBank->iterate ([&] (Sequence& s)
        {
            /** Shortcut. */
            char* data = s.getDataBuffer();

            size_t nbOK = 0;
            for (size_t i=0; i<s.getDataSize(); i++)
            {
                if (data[i]=='A' || data[i]=='C' || data[i]=='G' || data[i]=='T')  { nbOK++; }
            }

            if ((double)nbOK / (double)s.getDataSize() > percentThreshold)  {  outBank->insert (s);  }
        });

        /** We flush the output bank. */
        outBank->flush();
    }

    catch (OptionFailure& e)
    {
        return e.displayErrors (cout);
    }
    catch (Exception& e)
    {
        cerr << "EXCEPTION: " << e.getMessage() << endl;
    }
}
Пример #11
0
int main (int argc, char* argv[])
{
    /** We create a command line parser. */
    OptionsParser parser ("BankStats");
    parser.push_back (new OptionOneParam (STR_URI_INPUT, "bank input",   true));

    try
    {
        /** We parse the user options. */
        IProperties* options = parser.parse (argc, argv);

        std::string filename = options->getStr(STR_URI_INPUT);

        //! [snippet16_bank]
        // We get an instance of IBank from the URI.
        IBank* bank = Bank::open (filename);

        //! [snippet16_seq]
        // We create an iterator on the bank
        Iterator<Sequence>* it = bank->iterator();

        // We iterate the sequences of the bank
        for (it->first(); !it->isDone(); it->next())
        {
            // We get a shortcut on the current sequence and its data
            Sequence& seq  = it->item();
            Data&     data = seq.getData();

            // We dump some information about the sequence.
            std::cout << "comment " << seq.getComment() << std::endl;

            // We dump each nucleotide. NOTE: the output depends on the data encoding
            for (size_t i=0; i<data.size(); i++)  {  std::cout << data[i];  }  std::cout << std::endl;
        }

        //! [snippet16_seq]
        // The bank and the iterator have been allocated on the heap, so we have to delete them
        delete it;
        delete bank;
        //! [snippet16_bank]
    }
    catch (OptionFailure& e)
    {
        return e.displayErrors (std::cout);
    }
    catch (Exception& e)
    {
        std::cerr << "EXCEPTION: " << e.getMessage() << std::endl;
    }
}
Пример #12
0
bool CLogThread::queueLog(IEspContext & context,const char* serviceName, const char* request, const char* response)
{
    IProperties* pProperties = context.queryRequestParameters();

    StringBuffer UserID, UserRealm, UserReference, peer;
    if(pProperties != NULL && pProperties->hasProp("userid_"))
        UserID.appendf("%s",pProperties->queryProp("userid_"));
    else
        context.getUserID(UserID);

    if(pProperties != NULL && pProperties->hasProp("fqdn_"))
        UserRealm.appendf("%s",pProperties->queryProp("fqdn_"));
    else
        context.getRealm(UserRealm);

    Owned<IPropertyTree> pLogTreeInfo = createPTreeFromXMLString(request, ipt_none, ptr_none);
    IArrayOf<IEspLogInfo> LogArray;
    addLogInfo(LogArray, *pLogTreeInfo.get());

    if(pProperties != NULL && pProperties->hasProp("referencecode_"))
    {
        //lets manually add the reference number....
        IClientLogInfo& LogInfoTransaction =  addLogInfoElement(LogArray);
        LogInfoTransaction.setName("referencenumber");
        LogInfoTransaction.setValue(pProperties->queryProp("referencecode_"));
    }

    LOG_INFO _LogStruct(serviceName,-1,false);
    return queueLog(UserID.str(), UserRealm.str() , context.getPeer(peer).str(),_LogStruct, LogArray );
}
Пример #13
0
int main (int argc, char* argv[])
{
    /** We create a command line parser. */
    OptionsParser parser ("BankStats");
    parser.push_back (new OptionOneParam (STR_URI_INPUT, "bank input",   true));

    try
    {
        /** We parse the user options. */
        IProperties* options = parser.parse (argc, argv);

        // We get information about the bank.
        u_int64_t nbSequences=0, dataSize=0, seqMaxSize=0, seqMinSize=~0;

        // We declare an input Bank and use it locally
        IBank* inputBank = Bank::open (options->getStr(STR_URI_INPUT));
        LOCAL (inputBank);

        ProgressIterator<Sequence> it (*inputBank, "iterate");
        for (it.first(); !it.isDone(); it.next())
        {
            Data& data = it.item().getData();

            nbSequences ++;
            if (data.size() > seqMaxSize)  { seqMaxSize = data.size(); }
            if (data.size() < seqMinSize)  { seqMinSize = data.size(); }
            dataSize += data.size ();
        }

        std::cout << "data size         : " << dataSize     << std::endl;
        std::cout << "sequence number   : " << nbSequences  << std::endl;
        std::cout << "sequence max size : " << seqMaxSize   << std::endl;
        std::cout << "sequence min size : " << seqMinSize   << std::endl;
    }
    catch (OptionFailure& e)
    {
        return e.displayErrors (std::cout);
    }
    catch (Exception& e)
    {
        std::cerr << "EXCEPTION: " << e.getMessage() << std::endl;
    }
}
Пример #14
0
	void parseArgs(){

		_options = getInput();

		_inputFilename = _options->getStr(STR_URI_INPUT);

		if(!System::file().doesExist(_inputFilename)){
			std::cerr << "Error: input does not exist (" << _inputFilename << ")" << std::endl;
			exit(1);
		}
	}
Пример #15
0
int main (int argc, char* argv[])
{
    /** We create a command line parser. */
    OptionsParser parser ("BankDump");
    parser.push_back (new OptionOneParam (STR_URI_INPUT, "bank input",   true));

    try
    {
        /** We parse the user options. */
        IProperties* options = parser.parse (argc, argv);

        /** We dump the bank hierarchy. */
        dump (Bank::open (options->getStr(STR_URI_INPUT)));
    }
    catch (OptionFailure& e)
    {
        return e.displayErrors (std::cout);
    }
    catch (Exception& e)
    {
        std::cerr << "EXCEPTION: " << e.getMessage() << std::endl;
    }
}
Пример #16
0
int main (int argc, char* argv[])
{
    /** We create a command line parser. */
    OptionsParser parser ("bankgen");

    const char* OUTPUT_PREFIX = "-out";
    const char* SEQ_LEN       = "-seq-len";
    const char* READ_LEN      = "-read-len";
    const char* OVERLAP_LEN   = "-overlap-len";
    const char* COVERAGE      = "-coverage";

    parser.push_back (new OptionOneParam (OUTPUT_PREFIX,  "output prefix",               true));
    parser.push_back (new OptionOneParam (SEQ_LEN,        "sequence length",             false,  "1000000"));
    parser.push_back (new OptionOneParam (READ_LEN,       "read length",                 false,  "150" ));
    parser.push_back (new OptionOneParam (OVERLAP_LEN,    "overlap between two reads",   false,  "50" ));
    parser.push_back (new OptionOneParam (COVERAGE,       "coverage",                    false,  "3" ));

    try
    {
        /** We parse the user options. */
        IProperties* options = parser.parse (argc, argv);

        /** We create the random sequence. */
        IBank* randomBank = new BankRandom (1, options->getInt(SEQ_LEN));
        LOCAL (randomBank);

        /** We create the reads bank. */
        IBank* readsBank = new BankSplitter (
            randomBank,
            options->getInt(READ_LEN),
            options->getInt(OVERLAP_LEN),
            options->getInt(COVERAGE)
        );
        LOCAL (readsBank);

        /** We save the random bank. */
        SaveAsFasta (randomBank, options->getStr(OUTPUT_PREFIX) + "_sequence.fa");

        /** We save the reads bank. */
        SaveAsFasta (readsBank, options->getStr(OUTPUT_PREFIX) + "_reads.fa");
    }
    catch (OptionFailure& e)
    {
        e.getParser().displayErrors (stdout);
        e.getParser().displayHelp   (stdout);
        return EXIT_FAILURE;
    }

    return EXIT_SUCCESS;
}
Пример #17
0
int main (int argc, char* argv[])
{
    /** We create a command line parser. */
    OptionsParser parser ("GraphStats");
    parser.push_back (new OptionOneParam (STR_URI_GRAPH, "graph input",  true));
    parser.push_back (new OptionOneParam (STR_NB_CORES,  "nb cores",     false, "0"));

    try
    {
        /** We parse the user options. */
        IProperties* options = parser.parse (argc, argv);

        // We load the graph
        Graph graph = Graph::load (options->getStr(STR_URI_GRAPH));

        // We set the number of cores to be used. Use all available cores if set to 0.
        size_t nbCores = options->getInt(STR_NB_CORES);

        // We get an iterator for branching nodes of the graph.
        // We use a progress iterator to get some progress feedback
        ProgressGraphIterator<BranchingNode,ProgressTimer>  itBranching (graph.iterator<BranchingNode>(), "statistics");

        // We define some kind of unique identifier for a couple (indegree,outdegree)
        typedef pair<size_t,size_t> InOut_t;

        // We want to gather some statistics during the iteration.
        // Note the use of ThreadObject: this object will be cloned N times (one object per thread) and each clone will
        // be reachable within the iteration block through ThreadObject::operator()
        ThreadObject <map <InOut_t, size_t> > topology;

        // We dispatch the iteration on several cores. Note the usage of lambda expression here.
        IDispatcher::Status status = Dispatcher(nbCores).iterate (itBranching, [&] (const BranchingNode& node)
        {
            // We retrieve the current instance of map <InOut_t,size_t> for the current running thread.
            map <InOut_t,size_t>& localTopology = topology();

            // We get branching nodes neighbors for the current branching node.
            Graph::Vector<BranchingEdge> successors   = graph.successors  <BranchingEdge> (node);
            Graph::Vector<BranchingEdge> predecessors = graph.predecessors<BranchingEdge> (node);

            // We increase the occurrences number for the current couple (in/out) neighbors
            localTopology [make_pair(predecessors.size(), successors.size())] ++;
        });

        // Now, the parallel processing is done. We want now to aggregate the information retrieved
        // in each thread in a single map.

        // We get each map<InOut_t,size_t> object filled in each thread, and we add its data into the "global" map.
        // The global map is reachable through the ThreadObject::operator*. The "topology.foreach" will loop over
        // all cloned object used in the threads.
        topology.foreach ([&] (const map <InOut_t, size_t>& t)
        {
            // We update the occurrence of the current couple (in/out)
            for_each (t.begin(), t.end(), [&] (const pair<InOut_t, size_t>& p) { (*topology)[p.first] += p.second;  });
        });

        // We sort the statistics by decreasing occurrence numbers. Since map have its own ordering, we need to put all
        // the data into a vector and sort it with our own sorting criteria.
        vector < pair<InOut_t,size_t> >  stats;
        for (auto it = topology->begin(); it != topology->end(); it++)  { stats.push_back (*it); }
        sort (stats.begin(), stats.end(), [=] (const pair<InOut_t,size_t>& a, const pair<InOut_t,size_t>& b) { return a.second > b.second; });

        printf ("\nThere are %d branching nodes with the following distribution: \n", itBranching.size());

        size_t sum=0;
        for (size_t i=0; i<stats.size(); i++)
        {
            sum += stats[i].second;

            printf ("    [in=%d out=%d]  nb=%7d  percent=%5.2f  distrib=%5.2f\n",
                stats[i].first.first,
                stats[i].first.second,
                stats[i].second,
                100.0*(float)stats[i].second / (float)itBranching.size(),
                100.0*(float)sum             / (float)itBranching.size()
            );
        }

        printf ("\nDone on %d cores in %.2f sec\n\n", status.nbCores, (float)status.time/1000.0);
    }
    catch (OptionFailure& e)
    {
        return e.displayErrors (std::cout);
    }
    catch (Exception& e)
    {
        std::cerr << "EXCEPTION: " << e.getMessage() << std::endl;
    }

    return EXIT_SUCCESS;
}
Пример #18
0
int CFileSpraySoapBindingEx::onGetInstantQuery(IEspContext &context, CHttpRequest* request, CHttpResponse* response, const char *service, const char *method)
{
    bool permission = true;
    bool bDownloadFile = false;
    bool bProcess;
    StringBuffer sourceLogicalFile;
    StringBuffer methodbuf;
    StringBuffer submethod;
    StringBuffer xsltFileName(getCFD());
    xsltFileName.append("smc_xslt/");

    if (stricmp(method, "SprayFixedInput")==0)
    {
        if (!context.validateFeatureAccess(FILE_SPRAY_URL, SecAccess_Write, false))
            permission = false;

        bProcess = true;
        xsltFileName.append("fs_sprayForm.xslt");
        methodbuf.append("SprayFixed");
    }
    else if(stricmp(method, "SprayVariableInput")==0)
    {
        if (!context.validateFeatureAccess(FILE_SPRAY_URL, SecAccess_Write, false))
            permission = false;

        bProcess = true;
        xsltFileName.append("fs_sprayForm.xslt");
        methodbuf.append("SprayVariable");
        request->getParameter("submethod", submethod);
    }
    else if (stricmp(method, "DesprayInput")==0)
    {
        if (!context.validateFeatureAccess(FILE_DESPRAY_URL, SecAccess_Write, false))
            permission = false;

        request->getParameter("sourceLogicalName", sourceLogicalFile);
        xsltFileName.append("fs_desprayCopyForm.xslt");
        methodbuf.append("Despray");
        bProcess = true;
    }
    else if (stricmp(method, "CopyInput") == 0)
    {
        if (!context.validateFeatureAccess(FILE_SPRAY_URL, SecAccess_Write, false))
            permission = false;

        request->getParameter("sourceLogicalName", sourceLogicalFile);
        xsltFileName.append("fs_desprayCopyForm.xslt");
        methodbuf.append("Copy");
        bProcess = true;
    }
    else if (stricmp(method, "RenameInput") == 0)
    {
        if (!context.validateFeatureAccess(FILE_SPRAY_URL, SecAccess_Write, false))
            permission = false;

        request->getParameter("sourceLogicalName", sourceLogicalFile);
        xsltFileName.append("fs_renameForm.xslt");
        methodbuf.append("Rename");
        bProcess = true;
    }
    else if (stricmp(method, "DownloadFile") == 0)
    {
        if (!context.validateFeatureAccess(FILE_SPRAY_URL, SecAccess_Full, false))
            permission = false;

        downloadFile(context, request, response);
        bDownloadFile = true;
        bProcess = true;
    }
    else
        bProcess = false;

    if (bProcess)
    {
        if (bDownloadFile)
            return 0;

        StringBuffer xml;
        Owned<IProperties> params(createProperties());
        if (!permission)
        {
            params->setProp("@method", methodbuf.str());
            xml.append("<Environment><ErrorMessage>Permission denied.</ErrorMessage></Environment>");
        }
        else
        {
            if(submethod.length() > 0)
                params->setProp("@submethod", submethod.str());
            params->setProp("@method", methodbuf.str());

            if (*sourceLogicalFile.str())
            {
                params->setProp("@sourceLogicalName", sourceLogicalFile.str());

                Owned<IUserDescriptor> userdesc;
                StringBuffer username;
                context.getUserID(username);
                if(username.length() > 0)
                {
                    const char* passwd = context.queryPassword();
                    userdesc.setown(createUserDescriptor());
                    userdesc->set(username.str(), passwd);

                    try 
                    {
                        if (stricmp(method, "CopyInput") == 0)
                        {
                            Owned<IDistributedFile> df = queryDistributedFileDirectory().lookup(sourceLogicalFile.str(), userdesc.get());
                            if(!df)
                            {
                                throw MakeStringException(ECLWATCH_FILE_NOT_EXIST,"Could not find file %s.",sourceLogicalFile.str());
                            }
            
                            const char *kind = df->queryAttributes().queryProp("@kind");
                            if (kind && strcmp(kind,"key")==0)
                            {
                                params->setProp("@compressflag", 0);
                            }
                            else if(df->isCompressed())
                            {
                                params->setProp("@compressflag", 2);
                            }
                            else
                            {
                                params->setProp("@compressflag", 1);
                            }
                        }
                    }
                    catch (IException *E)
                    {
                        Owned<IXslProcessor> xslp = getXslProcessor();
                        if (!xslp)
                            throw E;

                        Owned<IMultiException> me = MakeMultiException();
                        me->append(*E);
                        response->handleExceptions(xslp, me, "FileSpray", method, StringBuffer(getCFD()).append("./smc_xslt/exceptions.xslt").str());
                        return 0;
                    }
                }
            }
            else
            {
                params->setProp("@compressflag", 1);
            }

            StringBuffer wuid;
            request->getParameter("wuid", wuid);
            Owned<IPropertyTree> pTree = createPTreeForXslt(method, wuid.str());
            toXML(pTree, xml, false);
        }
    
        IProperties* requestparams = request->queryParameters();
        if(requestparams && requestparams->hasProp("rawxml_"))
        {
            response->setContent(xml.str());
            response->setContentType(HTTP_TYPE_APPLICATION_XML);
        }
        else{
            StringBuffer htmlbuf;
            xsltTransform(xml.str(), xsltFileName.str(), params, htmlbuf);
            response->setContent(htmlbuf.str());
            response->setContentType(HTTP_TYPE_TEXT_HTML_UTF8);
        }

        response->send();
        return 0;
    }
    else
        return CFileSpraySoapBinding::onGetInstantQuery(context, request, response, service, method);
}
Пример #19
0
/*********************************************************************
** METHOD  :
** PURPOSE :
** INPUT   :
** OUTPUT  :
** RETURN  :
** REMARKS :
*********************************************************************/
IProperties* ToolComposite::run (int argc, char* argv[])
{
    vector<IProperties*> inputs;

    /** We first parse the options for all tools. */
    for (list<Tool*>::iterator it = _tools.begin(); it != _tools.end(); it++)
    {
#if 0
        /** We get the parameters from the current parser. */
        IProperties* input = (*it)->getParser()->parse (argc, argv);

        /** We add the input into the vector that gather the tools inputs. */
        inputs.push_back (input);
#else

        try
        {
            /** We parse the user parameters. */
            (*it)->getParser()->parse (argc, argv);


			IProperties* input =  (*it)->getParser()->getProperties() ;
            /** We add the input into the vector that gather the tools inputs. */
            inputs.push_back (input);
        }
        catch (OptionFailure& e)
        {
			IProperties* input =  (*it)->getParser()->getProperties() ;

			/** We add the input into the vector that gather the tools inputs. */
			inputs.push_back (input);
			
//            e.getParser().displayErrors (stdout);
//            e.getParser().displayHelp   (stdout);
//            return NULL;
        }
#endif
    }

    IProperties* output = 0;
    size_t idx = 0;
    for (list<Tool*>::iterator it = _tools.begin(); it != _tools.end(); it++, idx++)
    {
        /** We get the parameters from the current inputs entry. */
        IProperties* input = inputs[idx];

        /** We may have to add the output of the previous tool to the input of the current tool.
         *  WARNING! The output of the previous tool should have a bigger priority than the
         *  user parameters of the current tool.
         */
        IProperties* actualInput = 0;
        if (output != 0)
        {
            actualInput = new Properties();
            actualInput->add (1, output);   // output of the previous tool
            actualInput->add (1, input);    // input  of the previous tool
        }
        else
        {
            actualInput = input;
        }

        /** We run the tool and get a reference on its output. */
        output = (*it)->run (actualInput);

        /** We add the current tool info to the global properties. */
        _info->add (1, (*it)->getInfo());
    }

    /** We return the output properties. */
    return _output;
}
Пример #20
0
void CFileSpraySoapBindingEx::downloadFile(IEspContext &context, CHttpRequest* request, CHttpResponse* response)
{
    try
    {
        StringBuffer netAddressStr, osStr, pathStr, nameStr;
        request->getParameter("NetAddress", netAddressStr);
        request->getParameter("OS", osStr);
        request->getParameter("Path", pathStr);
        request->getParameter("Name", nameStr);
        
#if 0
        StringArray files;
        IProperties* params = request->queryParameters();
        Owned<IPropertyIterator> iter = params->getIterator();
        if (iter && iter->first())
        {
            while (iter->isValid())
            {
                const char *keyname=iter->getPropKey();
                if (!keyname || strncmp(keyname, "Names", 5))
                    continue;

                files.append(params->queryProp(iter->getPropKey()));
                iter->next();
            }
        }
#endif

        if (netAddressStr.length() < 1)
            throw MakeStringException(ECLWATCH_INVALID_INPUT, "Network address not specified.");

        if (pathStr.length() < 1)
            throw MakeStringException(ECLWATCH_INVALID_INPUT, "Path not specified.");

        if (nameStr.length() < 1)
            throw MakeStringException(ECLWATCH_INVALID_INPUT,"File name not specified.");

        char pathSep = '/';
        if ((osStr.length() > 1) && (atoi(osStr.str())== OS_WINDOWS))
        {
            pathSep = '\\';
        }

        pathStr.replace(pathSep=='\\'?'/':'\\', pathSep);
        if (*(pathStr.str() + pathStr.length() -1) != pathSep)
            pathStr.append( pathSep );

        StringBuffer fullName;
        fullName.appendf("%s%s", pathStr.str(), nameStr.str());

        StringBuffer headerStr("attachment;");
        headerStr.appendf("filename=%s", nameStr.str());

        RemoteFilename rfn;
        rfn.setRemotePath(fullName.str());
        SocketEndpoint ep(netAddressStr.str());
        rfn.setIp(ep);

        Owned<IFile> rFile = createIFile(rfn);
        if (!rFile)
            throw MakeStringException(ECLWATCH_CANNOT_OPEN_FILE,"Cannot open file %s.",fullName.str());

        OwnedIFileIO rIO = rFile->openShared(IFOread,IFSHfull);
        if (!rIO)
            throw MakeStringException(ECLWATCH_CANNOT_READ_FILE,"Cannot read file %s.",fullName.str());

        IFileIOStream* ioS = createIOStream(rIO);
        context.addCustomerHeader("Content-disposition", headerStr.str());
        response->setContent(ioS);  
        response->setContentType(HTTP_TYPE_OCTET_STREAM);
        response->send();
    }
    catch(IException* e)
    {   
        FORWARDEXCEPTION(context, e,  ECLWATCH_INTERNAL_ERROR);
    }
    return;
}
Пример #21
0
int main (int argc, char* argv[])
{
    /** We create a command line parser. */
    OptionsParser parser ("KmerTest");
    parser.push_back (new OptionOneParam (STR_URI_INPUT,      "bank input",     true));
    parser.push_back (new OptionOneParam (STR_KMER_SIZE,      "kmer size",      true));
    parser.push_back (new OptionOneParam (STR_MINIMIZER_SIZE, "minimizer size", true));
    parser.push_back (new OptionNoParam  (STR_VERBOSE,        "display kmers",  false));

    try
    {
        /** We parse the user options. */
        IProperties* options = parser.parse (argc, argv);

        // We get the kmer and minimizer sizes.
        size_t kmerSize = options->getInt(STR_KMER_SIZE);
        size_t mmerSize = options->getInt(STR_MINIMIZER_SIZE);

        // We define a try/catch block in case some method fails (bad filename for instance)
        u_int64_t nbKmers       = 0;
        bool display = options->get(STR_VERBOSE) != 0;

        // We declare a Bank instance defined by a list of filenames
        IBank* bank = Bank::open (options->getStr(STR_URI_INPUT));
        LOCAL (bank);

        // We declare a kmer model and a minimizer model
        Model model (kmerSize, mmerSize);

        // We get a reference on the minimizer model, which will be useful for dumping
       const ModelMinimizer::Model& modelMinimizer = model.getMmersModel();

        Kmer<span>::Type checksum;
        size_t nbChanged = 0;
        size_t nbInvalid = 0;

        // We define an iterator that encapsulates the sequences iterator with progress feedback
        ProgressIterator<Sequence> iter (*bank, "iterate bank");

        // We loop over sequences.
        for (iter.first(); !iter.isDone(); iter.next())
        {
            // Shortcut
            Sequence& seq = iter.item();

//! [snippet1_iterate]
            // We iterate the kmers (and minimizers) of the current sequence.
            model.iterate (seq.getData(), [&] (const Model::Kmer& kmer, size_t idx)
            {
                nbKmers ++;
                if (kmer.hasChanged() == true)   { nbChanged++;  }
                if (kmer.isValid()    == false)  { nbInvalid++;  }
                checksum += kmer.minimizer().value();
            });
//! [snippet1_iterate]
        }

        cout << "nbKmers   : " << nbKmers   << endl;
        cout << "nbInvalid : " << nbInvalid << endl;
        cout << "nbChanged : " << nbChanged << endl;
        cout << "ratio     : " << (nbChanged > 0 ? (double)nbKmers / (double)nbChanged : 0) << endl;
        cout << "checksum  : " << checksum  << endl;
    }
    catch (OptionFailure& e)
    {
        return e.displayErrors (std::cout);
    }
    catch (Exception& e)
    {
        std::cerr << "EXCEPTION: " << e.getMessage() << std::endl;
    }

    return EXIT_SUCCESS;
}
Пример #22
0
    void Debloom_check1 ()
    {
        size_t kmerSize = 11;
        size_t miniSize = 8;
        size_t nks      = 1;

        const char* seqs[] = {
            "CGCTACAGCAGCTAGTTCATCATTGTTTATCAATGATAAAATATAATAAGCTAAAAGGAAACTATAAATA"
            "ACCATGTATAATTATAAGTAGGTACCTATTTTTTTATTTTAAACTGAAATTCAATATTATATAGGCAAAG"
        } ;

        /** We configure parameters for a SortingCountAlgorithm object. */
        IProperties* params = SortingCountAlgorithm<>::getDefaultProperties();  LOCAL (params);
        params->setInt (STR_KMER_SIZE,          kmerSize);
        params->setInt (STR_MINIMIZER_SIZE,     miniSize);
        params->setInt (STR_MAX_MEMORY,         MAX_MEMORY);
        params->setInt (STR_KMER_ABUNDANCE_MIN, nks);
        params->setStr (STR_URI_OUTPUT,         "foo");

        /** We create a SortingCountAlgorithm object. */
        SortingCountAlgorithm<> sortingCount (new BankStrings (seqs, ARRAY_SIZE(seqs)), params);

        /** We launch DSK. */
        sortingCount.execute();

        CPPUNIT_ASSERT (sortingCount.getSolidCounts()->getNbItems() == (int64_t)(strlen(seqs[0]) - kmerSize + 1) );

        /** We get the storage instance. */
        Storage* storage = sortingCount.getStorage();
        LOCAL (storage);

        Partition<SortingCountAlgorithm<>::Count>& counts = storage->getGroup("dsk").getPartition<Kmer<>::Count> ("solid");

        /** We create a bloom instance. */
        float nbitsPerKmer = DebloomAlgorithm<>::getNbBitsPerKmer (kmerSize, DEBLOOM_ORIGINAL);
        BloomAlgorithm<> bloom (*storage, &counts, kmerSize, nbitsPerKmer, 0, BLOOM_BASIC);
        bloom.execute ();

        /** We create a debloom instance. */
        DebloomAlgorithm<> debloom (
            storage->getGroup("bloom"),
            storage->getGroup("debloom"),
            &counts, kmerSize, miniSize, 1000, 0, BLOOM_BASIC, DEBLOOM_ORIGINAL
        );

        /** We launch the debloom. */
        debloom.execute();

        /** The following values have been computed with the original minia. */
        u_int64_t values[] =
        {
            0xc0620,    0x288f40,   0x188f40,   0x2aaa29,   0x8000b,    0x200881,   0x288081,   0x820db,    0x52e23,    0x2888f,
            0xaaa8b,    0x28838d,   0x20000,    0xa93ab,    0x2c18d,    0x2ba89,    0x183600,   0xea00b,    0x1a4ea0,   0xf8585
        };
        set<Kmer<>::Type> okValues;
        
        for (unsigned int i = 0; i < ARRAY_SIZE(values); i++)
        {
            Kmer<>::Type val; val.setVal(values[i]);
            okValues.insert(val);
        }

        CPPUNIT_ASSERT (debloom.getCriticalKmers()->getNbItems() == ARRAY_SIZE(values));

        /** We iterate the cFP kmers. */
        set<Kmer<>::Type> checkValues;
        Iterator<Kmer<>::Type>* iter = debloom.getCriticalKmers()->iterator();
        LOCAL (iter);

        for (iter->first(); !iter->isDone(); iter->next())
        {
            set<Kmer<>::Type>::iterator lookup = okValues.find (iter->item());
            CPPUNIT_ASSERT (lookup != okValues.end());

            checkValues.insert (iter->item());
        }

        CPPUNIT_ASSERT (checkValues.size() == okValues.size());
    }
Пример #23
0
inline void XF(IPropertyTree &pt,const char *p,IProperties &from,const char *fp)
{
    const char * v = from.queryProp(fp);
    if (v&&*v)
        pt.setProp(p,v);
}
Пример #24
0
int main (int argc, char* argv[])
{
    const size_t SPAN = KMER_SPAN(1);

    /** Shortcuts. */
    typedef Kmer<SPAN>::Type  Type;
    typedef Kmer<SPAN>::Count Count;
    typedef Kmer<SPAN>::ModelCanonical ModelCanon;
    typedef Kmer<SPAN>::ModelMinimizer <ModelCanon> Model;

    size_t kmerSize = 33;
    size_t mmerSize = 11;

    /** We create a command line parser. */
    OptionsParser parser ("GraphStats");
    parser.push_back (new OptionOneParam (STR_URI_INPUT, "bank input",  true));

    try
    {
        /** We parse the user options. */
        IProperties* options = parser.parse (argc, argv);

        string filename = options->getStr (STR_URI_INPUT);

        /** We create the solid kmers. */
        Graph graph = Graph::create ("-in %s -kmer-size %d  -bloom none -out toto.h5  -abundance-min 1", filename.c_str(), kmerSize);

        /** We get the information of the solid kmers from the HDF5 file. */
        Storage* storage = StorageFactory(STORAGE_HDF5).load ("toto.h5");   LOCAL (storage);
        Group& dskGroup = storage->getGroup("dsk");

        /** We get the solid kmers partition. */
        Partition<Count>& partition = dskGroup.getPartition<Count> ("solid");

        /** We create two kmers models. */
        Model  model   (kmerSize,   mmerSize);
        Model  modelK1 (kmerSize-1, mmerSize);

        // We declare an output Bank
        BankBinary outputBank (System::file().getBaseName(filename) + ".bin");

        /** We create a sequence with BINARY encoding. */
        Sequence seq (Data::ASCII);

        /** We get an iterator over the [kmer,abundance] of solid kmers. */
        Iterator<Count>* it = partition.iterator();   LOCAL (it);

        /** We iterate the solid kmers. */
        for (it->first(); !it->isDone(); it->next())
        {
            Type current = it->item().value;

            cout << "kmer=" << it->item().value << "  minimizer=" << model.getMinimizerValue(current)
                << "  abundance=" << it->item().abundance << endl;

            /** We interpret the kmer value as a Data object. */
            Data data (Data::BINARY);
            data.setRef ((char*) &current, model.getKmerSize());

            modelK1.iterate (data, [&] (const Model::Kmer& k, size_t idx)
            {
                /** Shortcut. */
                Type miniminizerCurrent = k.minimizer().value();

                cout << "-> " << k.value()
                     << " minimizer=" << miniminizerCurrent << " "
                     << modelK1.getMmersModel().toString (miniminizerCurrent)
                     << endl;

                string tmp = modelK1.getMmersModel().toString (miniminizerCurrent);

                /** We interpret the minimizer value as a Data object. */
                seq.getData().setRef ((char*)tmp.c_str(), modelK1.getMmersModel().getKmerSize());

                /** We insert the sequence into the binary bank. */
                outputBank.insert (seq);
            });
        }

        /** We flush the output bank. */
        outputBank.flush();

        /** We iterate the output bank. */
        outputBank.iterate ([&] (const Sequence& s)
        {
            /** We get the kmer corresponding to the current sequence. */
            ModelCanon::Kmer mini = modelK1.getMmersModel().codeSeed (s.getDataBuffer(), Data::BINARY);

            cout << "mini=" << mini.value() << "  " << modelK1.getMmersModel().toString (mini.value()) << endl;
        });
    }
    catch (OptionFailure& e)
    {
        return e.displayErrors (std::cout);
    }
    catch (Exception& e)
    {
        std::cerr << "EXCEPTION: " << e.getMessage() << std::endl;
    }
}
Пример #25
0
int main (int argc, char* argv[])
{
    /** We create a command line parser. */
    OptionsParser parser;
    parser.push_back (new OptionOneParam (STR_URI_INPUT,  "graph file", true));

    IProperties* params = 0;

    try  {
        /** We parse the user options. */
        params = parser.parse (argc, argv);
    }
    catch (OptionFailure& e)
    {
        e.getParser().displayErrors (stdout);
        e.getParser().displayHelp   (stdout);
        return EXIT_FAILURE;
    }

    // We create the graph with the bank and other options
    Graph graph = Graph::load (params->getStr(STR_URI_INPUT));

    // We create a graph marker.
    GraphMarker<BranchingNode> marker (graph);

    // We create an object for Breadth First Search for the de Bruijn graph.
    BFS<BranchingNode> bfs (graph);

    // We want to compute the distribution of connected components of the branching nodes.
    //    - key is a connected component class (for a given number of branching nodes for this component)
    //    - value is the number of times this component class occurs in the branching sub graph
    map<size_t,Entry> distrib;

    // We get an iterator for all nodes of the graph. We use a progress iterator to get some progress feedback
    ProgressGraphIterator<BranchingNode,ProgressTimer>  itBranching (graph.iterator<BranchingNode>(), "statistics");

    // We want to know the number of connected components
    size_t nbConnectedComponents = 0;

    // We define some kind of unique identifier for a couple (indegree,outdegree)
    map <InOut_t, size_t> topology;

    size_t simplePathSizeMin = ~0;
    size_t simplePathSizeMax =  0;


    // We want time duration of the iteration
    TimeInfo ti;
    ti.start ("compute");

    // We loop the branching nodes
    for (itBranching.first(); !itBranching.isDone(); itBranching.next())
    {
        // We get branching nodes neighbors for the current branching node.
        Graph::Vector<BranchingEdge> successors   = graph.successors  <BranchingEdge> (*itBranching);
        Graph::Vector<BranchingEdge> predecessors = graph.predecessors<BranchingEdge> (*itBranching);

        // We increase the occurrences number for the current couple (in/out) neighbors
        topology [make_pair(predecessors.size(), successors.size())] ++;

        // We loop the in/out neighbors and update min/max simple path size
        for (size_t i=0; i<successors.size(); i++)
        {
            simplePathSizeMax = std::max (simplePathSizeMax, successors[i].distance);
            simplePathSizeMin = std::min (simplePathSizeMin, successors[i].distance);
        }
        for (size_t i=0; i<predecessors.size(); i++)
        {
            simplePathSizeMax = std::max (simplePathSizeMax, predecessors[i].distance);
            simplePathSizeMin = std::min (simplePathSizeMin, predecessors[i].distance);
        }

        // We skip already visited nodes.
        if (marker.isMarked (*itBranching))  {
            continue;
        }

        // We launch the breadth first search; we get as a result the set of branching nodes in this component
        const set<BranchingNode>& component = bfs.run (*itBranching);

        // We mark the nodes for this connected component
        marker.mark (component);

        // We update our distribution
        distrib[component.size()].nbOccurs += 1;

        // We update the number of connected components.
        nbConnectedComponents++;
    }

    ti.stop ("compute");

    // We compute the total number of branching nodes in all connected components.
    size_t sumOccurs = 0;
    size_t sumKmers = 0;
    for (map<size_t,Entry>::iterator it = distrib.begin(); it != distrib.end(); it++)
    {
        sumOccurs += it->first*it->second.nbOccurs;
        sumKmers  += it->second.nbKmers;
    }

    // We sort the statistics by decreasing occurrence numbers. Since map have its own ordering, we need to put all
    // the data into a vector and sort it with our own sorting criteria.
    vector < pair<InOut_t,size_t> >  stats;
    for (map <InOut_t, size_t>::iterator it = topology.begin(); it != topology.end(); it++)  {
        stats.push_back (*it);
    }

    sort (stats.begin(), stats.end(), CompareFct);

    // Note: it must be equal to the number of branching nodes of the graph
    assert (sumOccurs == itBranching.size());

    // We aggregate the computed information
    Properties props ("topology");

    props.add (1, "graph");
    props.add (2, "name",                    "%s", graph.getName().c_str());
    props.add (2, "db_input",                "%s", graph.getInfo().getStr("input").c_str());
    props.add (2, "db_nb_seq",               "%d", graph.getInfo().getInt("sequences_number"));
    props.add (2, "db_size",                 "%d", graph.getInfo().getInt("sequences_size"));
    props.add (2, "kmer_size",               "%d", graph.getInfo().getInt("kmer_size"));
    props.add (2, "kmer_nks",                "%d", graph.getInfo().getInt("nks"));
    props.add (2, "nb_nodes",                "%d", graph.getInfo().getInt("kmers_nb_solid"));
    props.add (2, "nb_branching_nodes",      "%d", graph.getInfo().getInt("nb_branching"));
    props.add (2, "percent_branching_nodes", "%.1f",
               graph.getInfo().getInt("kmers_nb_solid") > 0 ?
               100.0 * (float)graph.getInfo().getInt("nb_branching") / (float) graph.getInfo().getInt("kmers_nb_solid") : 0
              );

    props.add (1, "branching_nodes");

    props.add (2, "simple_path");
    props.add (3, "size_min", "%d", simplePathSizeMin);
    props.add (3, "size_max", "%d", simplePathSizeMax);

    props.add (2, "neighborhoods");
    for (size_t i=0; i<stats.size(); i++)
    {
        props.add (3, "neighborhood", "in=%d out=%d", stats[i].first.first, stats[i].first.second);
        props.add (4, "nb_bnodes",     "%d",    stats[i].second);
        props.add (4, "percentage",   "%5.2f", itBranching.size() > 0 ?
                   100.0*(float)stats[i].second / (float)itBranching.size() : 0
                  );
    }

    props.add (2, "connected_components");
    props.add (3, "nb_classes",    "%d", distrib.size());
    props.add (3, "nb_components", "%d", nbConnectedComponents);
    for (map<size_t,Entry>::iterator it = distrib.begin(); it!=distrib.end(); it++)
    {
        props.add (3, "component_class");
        props.add (4, "nb_occurs",    "%d", it->second.nbOccurs);
        props.add (4, "nb_bnodes",    "%d", it->first);
        props.add (4, "freq_bnodes",  "%f", sumOccurs > 0 ?
                   100.0*(float)(it->first*it->second.nbOccurs) / (float)sumOccurs : 0
                  );
    }
    props.add (1, ti.getProperties("time"));

    // We dump the results in a XML file in the current directory
    XmlDumpPropertiesVisitor v (graph.getName() + ".xml", false);
    props.accept (&v);

    return EXIT_SUCCESS;
}
Пример #26
0
int main (int argc, char* argv[])
{
    /** We create a command line parser. */
    OptionsParser parser ("BankSplitter");
    parser.push_back (new OptionOneParam (STR_URI_INPUT,      "bank reference",            true));
    parser.push_back (new OptionOneParam (STR_MAX_INPUT_SIZE, "average db size per split", true));
    parser.push_back (new OptionOneParam (STR_URI_OUTPUT_DIR, "output directory",          false, "."));
    parser.push_back (new OptionNoParam  (STR_OUTPUT_FASTQ,   "fastq output",              false));
    parser.push_back (new OptionNoParam  (STR_OUTPUT_GZ,      "gzip output",               false));

    // We define a try/catch block in case some method fails (bad filename for instance)
    try
    {
        /** We parse the user options. */
        IProperties* options = parser.parse (argc, argv);

        /** Shortcuts. */
        u_int64_t maxDbSize = options->getInt(STR_MAX_INPUT_SIZE);

        // We declare an input Bank
        IBank* inputBank = Bank::open (options->getStr(STR_URI_INPUT));
        LOCAL (inputBank);

        // We get the basename of the input bank.
        string inputBasename = System::file().getBaseName (options->getStr(STR_URI_INPUT));

        /** We set the name of the output directory. */
        stringstream ss;  ss << inputBasename << "_S" << maxDbSize;
        string outputDirName = ss.str();

        /** We create the output directory. */
        string outputDir = options->getStr(STR_URI_OUTPUT_DIR) + "/" + outputDirName;
        System::file().mkdir (outputDir, S_IRWXU);

        // We create the album bank.
        BankAlbum album (outputDir + "/album.txt");

        /** We get estimations about the bank. */
        u_int64_t number, totalSize, maxSize;
        inputBank->estimate (number, totalSize, maxSize);

        u_int64_t estimationNbSeqToIterate = number;

        // We create an iterator over the input bank
        ProgressIterator<Sequence> itSeq (*inputBank, "split");

        // We loop over sequences to get the exact number of sequences.
          int64_t nbBanksOutput = -1;
        u_int64_t nbSequences   =  0;
        u_int64_t dbSize        = ~0;

        bool isFastq   = options->get(STR_OUTPUT_FASTQ) != 0;
        bool isGzipped = options->get(STR_OUTPUT_GZ)    != 0;

        IBank* currentBank = 0;

        for (itSeq.first(); !itSeq.isDone(); itSeq.next())
        {
            if (dbSize > maxDbSize)
            {
                if (currentBank != 0)  { currentBank->flush();  currentBank->finalize(); }

                nbBanksOutput ++;

                /** We build the uri of the current bank. */
                stringstream ss;  ss << inputBasename << "_" << nbBanksOutput << (isFastq ? ".fastq" : ".fasta");
                if (isGzipped) { ss << ".gz"; }

                /** We create a new bank and put it in the album. */
                currentBank = album.addBank (outputDir, ss.str(), isFastq, isGzipped);

                /** We reinit the db size counter. */
                dbSize = 0;
            }

            dbSize += itSeq->getDataSize();

            /** We insert the sequence into the current output bank. */
            currentBank->insert (*itSeq);
        }

        if (currentBank != 0)  { currentBank->flush(); }
    }
    catch (OptionFailure& e)
    {
        return e.displayErrors (cout);
    }
    catch (Exception& e)
    {
        cerr << "EXCEPTION: " << e.getMessage() << endl;
    }
}
Пример #27
0
 /** \copydoc Option::proceed */
 void proceed (const std::list<std::string>& args, IProperties& props)
 {
     props.add (0, getName(), args.front());
 }
Пример #28
0
int main (int argc, char* argv[])
{
    /** We create a command line parser. */
    OptionsParser parser ("GraphStats");
    parser.push_back (new OptionOneParam (STR_URI_GRAPH, "graph input",  true));

    try
    {
        /** We parse the user options. */
        IProperties* options = parser.parse (argc, argv);

        // We load the graph
        Graph graph = Graph::load (options->getStr(STR_URI_GRAPH));

        // We create a graph marker.
        GraphMarker marker (graph);

        // We create an object for Breadth First Search for the de Bruijn graph.
        BFS bfs (graph);

        // We want to compute the distribution of connected components of the branching nodes.
        //    - key is a connected component class (for a given number of branching nodes for this component)
        //    - value is the number of times this component class occurs in the branching sub graph
        map<size_t,size_t> distrib;

        // We get an iterator for all nodes of the graph. We use a progress iterator to get some progress feedback
        ProgressGraphIterator<BranchingNode,ProgressTimer>  itBranching (graph.iteratorBranching(), "statistics");

        // We want time duration of the iteration
        TimeInfo ti;
        ti.start ("compute");

        // We need to keep each connected component.
        list<set<BranchingNode> > components;

        // We loop the branching nodes
        for (itBranching.first(); !itBranching.isDone(); itBranching.next())
        {
            // We skip already visited nodes.
            if (marker.isMarked (*itBranching))  { continue; }

            // We launch the breadth first search; we get as a result the set of branching nodes in this component
            const set<BranchingNode>& component = bfs.run (*itBranching);

            // We memorize the component
            components.push_back (component);

            // We mark the nodes for this connected component
            marker.mark (component);

            // We update our distribution
            distrib[component.size()] ++;
        }

        ti.stop ("compute");

        // We compute the total number of branching nodes in all connected components.
        size_t sum = 0;   for (map<size_t,size_t>::iterator it = distrib.begin(); it != distrib.end(); it++)  {  sum += it->first*it->second; }

        // Note: it must be equal to the number of branching nodes of the graph
        assert (sum == itBranching.size());

        size_t idx1=0;
        size_t cc=0;
        // We check that each component has no intersection with all other components.
        // Note: this check may take a long time since we have N^2 intersections to compute.
        for (list<set<BranchingNode> >::iterator it1 = components.begin(); it1 != components.end(); it1++, idx1++)
        {
            size_t idx2=0;

            for (list<set<BranchingNode> >::iterator it2 = components.begin(); it2 != components.end(); it2++, idx2++)
            {
                if (it1 != it2)
                {
                    set<BranchingNode> inter;
                    set_intersection (it1->begin(),it1->end(),it2->begin(),it2->end(), std::inserter(inter,inter.begin()));
                    if (inter.size()!=0)  { printf ("ERROR, intersection should be empty...\n");  exit(EXIT_FAILURE); }
                }

                if (++cc % 50 == 0)
                {
                    cc = 0;
                    printf ("[check] %.1f  %.1f\r", 100.0*(float)idx1/(float)components.size(), 100.0*(float)idx2/(float)components.size());
                    fflush (stdout);
                }
            }
        }
        printf ("\n");

        // We aggregate the computed information
        Properties props ("connected_components");
        props.add (1, "graph_name",              "%s", graph.getName().c_str());
        props.add (1, "nb_branching_nodes",      "%d", sum);
        props.add (1, "nb_connected_components", "%d", distrib.size());
        for (map<size_t,size_t>::iterator it = distrib.begin(); it!=distrib.end(); it++)
        {
            props.add (2, "component");
            props.add (3, "nb_nodes",    "%d", it->first);
            props.add (3, "nb_occurs",   "%d", it->second);
            props.add (3, "freq_nodes",  "%f", 100.0*(float)(it->first*it->second) / (float)sum);
            props.add (3, "freq_occurs", "%f", 100.0*(float)it->second / (float)sum);
        }
        props.add (1, ti.getProperties("time"));

        // We dump the results in a XML file in the current directory
        XmlDumpPropertiesVisitor v (graph.getName() + ".xml", false);
        props.accept (&v);
    }
    catch (OptionFailure& e)
    {
        return e.displayErrors (std::cout);
    }
    catch (Exception& e)
    {
        std::cerr << "EXCEPTION: " << e.getMessage() << std::endl;
    }

    return EXIT_SUCCESS;
}