void ClassifySvmSharedCommand::trainSharedAndDesignData(vector<SharedRAbundVector*> lookup) {
    try {
        LabeledObservationVector labeledObservationVector;
        FeatureVector featureVector;
        readSharedRAbundVectors(lookup, designMap, labeledObservationVector, featureVector);
        SvmDataset svmDataset(labeledObservationVector, featureVector);
        int evaluationFoldCount = 3;
        int trainFoldCount = 5;
        OutputFilter outputFilter(2);
        OneVsOneMultiClassSvmTrainer t(svmDataset, evaluationFoldCount, trainFoldCount, *this, outputFilter);
        KernelParameterRangeMap kernelParameterRangeMap;
        getDefaultKernelParameterRangeMap(kernelParameterRangeMap);
        t.train(kernelParameterRangeMap);

        std::cout << "done training" << std::endl;

        map<string, string> variables;
        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
        variables["[distance]"] = lookup[0]->getLabel();
        string filename = getOutputFileName("summary", variables);
        outputNames.push_back(filename);
        outputTypes["summary"].push_back(filename);

        m->mothurOutEndLine();
        std::cout << "leaving processSharedAndDesignData" << std::endl;
    }
    catch (exception& e) {
        m->errorOut(e, "ClassifySvmSharedCommand", "trainSharedAndDesignData");
        exit(1);
    }
}
Beispiel #2
0
void MSBuildProvider::createFiltersFile(const BuildSetup &setup, const std::string &name) {
	// No filters => no need to create a filter file
	if (_filters.empty())
		return;

	// Sort all list alphabetically
	_filters.sort();
	_compileFiles.sort();
	_includeFiles.sort();
	_otherFiles.sort();
	_resourceFiles.sort();
	_asmFiles.sort();

	const std::string filtersFile = setup.outputDir + '/' + name + getProjectExtension() + ".filters";
	std::ofstream filters(filtersFile.c_str());
	if (!filters)
		error("Could not open \"" + filtersFile + "\" for writing");

	filters << "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
	           "<Project ToolsVersion=\"" << (_version >= 12 ? _version : 4) << ".0\" xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">\n";

	// Output the list of filters
	filters << "\t<ItemGroup>\n";
	for (std::list<std::string>::iterator filter = _filters.begin(); filter != _filters.end(); ++filter) {
		filters << "\t\t<Filter Include=\"" << *filter << "\">\n"
		           "\t\t\t<UniqueIdentifier>" << createUUID() << "</UniqueIdentifier>\n"
		           "\t\t</Filter>\n";
	}
	filters << "\t</ItemGroup>\n";

	// Output files
	outputFilter(filters, _compileFiles, "ClCompile");
	outputFilter(filters, _includeFiles, "ClInclude");
	outputFilter(filters, _otherFiles, "None");
	outputFilter(filters, _resourceFiles, "ResourceCompile");
	outputFilter(filters, _asmFiles, "CustomBuild");

	filters << "</Project>";
}
Beispiel #3
0
void Column::Private::setColumnMode(SciDAVis::ColumnMode mode, AbstractFilter *filter)
{
    if (mode == d_column_mode) return;

    void * old_data = d_data;
    // remark: the deletion of the old data will be done in the dtor of a command

    AbstractSimpleFilter *new_in_filter, *new_out_filter;
    bool filter_is_temporary; // it can also become outputFilter(), which we may not delete here
    Column* temp_col = 0;
    if (filter)
        filter_is_temporary = false;

    emit d_owner->modeAboutToChange(d_owner);

    // determine the conversion filter and allocate the new data vector
    switch(d_column_mode)
    {
    case SciDAVis::Numeric:
        disconnect(static_cast<Double2StringFilter *>(d_output_filter), SIGNAL(formatChanged()),
                   d_owner, SLOT(notifyDisplayChange()));
        switch(mode)
        {
        case SciDAVis::Numeric:
            break;
        case SciDAVis::Text:
            if (!filter) {
                filter = outputFilter();
                filter_is_temporary = false;
            }
            temp_col = new Column("temp_col", *(static_cast< QVector<qreal>* >(old_data)), d_validity);
            d_data = new QStringList();
            d_data_type = SciDAVis::TypeQString;
            break;
        case SciDAVis::DateTime:
            if (!filter) {
                filter = new Double2DateTimeFilter();
                filter_is_temporary = true;
            }
            temp_col = new Column("temp_col", *(static_cast< QVector<qreal>* >(old_data)), d_validity);
            d_data = new QList<QDateTime>();
            d_data_type = SciDAVis::TypeQDateTime;
            break;
        case SciDAVis::Month:
            if (!filter) {
                filter = new Double2MonthFilter();
                filter_is_temporary = true;
            }
            temp_col = new Column("temp_col", *(static_cast< QVector<qreal>* >(old_data)), d_validity);
            d_data = new QList<QDateTime>();
            d_data_type = SciDAVis::TypeQDateTime;
            break;
        case SciDAVis::Day:
            if (!filter) {
                filter = new Double2DayOfWeekFilter();
                filter_is_temporary = true;
            }
            temp_col = new Column("temp_col", *(static_cast< QVector<qreal>* >(old_data)), d_validity);
            d_data = new QList<QDateTime>();
            d_data_type = SciDAVis::TypeQDateTime;
            break;
        } // switch(mode)
        break;

    case SciDAVis::Text:
        switch(mode)
        {
        case SciDAVis::Text:
            break;
        case SciDAVis::Numeric:
            if (!filter) {
                filter = new String2DoubleFilter();
                filter_is_temporary = true;
            }
            temp_col = new Column("temp_col", *(static_cast< QStringList* >(old_data)), d_validity);
            d_data = new QVector<double>();
            d_data_type = SciDAVis::TypeDouble;
            break;
        case SciDAVis::DateTime:
            if (!filter) {
                filter = new String2DateTimeFilter();
                filter_is_temporary = true;
            }
            temp_col = new Column("temp_col", *(static_cast< QStringList* >(old_data)), d_validity);
            d_data = new QList<QDateTime>();
            d_data_type = SciDAVis::TypeQDateTime;
            break;
        case SciDAVis::Month:
            if (!filter) {
                filter = new String2MonthFilter();
                filter_is_temporary = true;
            }
            temp_col = new Column("temp_col", *(static_cast< QStringList* >(old_data)), d_validity);
            d_data = new QList<QDateTime>();
            d_data_type = SciDAVis::TypeQDateTime;
            break;
        case SciDAVis::Day:
            if (!filter) {
                filter = new String2DayOfWeekFilter();
                filter_is_temporary = true;
            }
            temp_col = new Column("temp_col", *(static_cast< QStringList* >(old_data)), d_validity);
            d_data = new QList<QDateTime>();
            d_data_type = SciDAVis::TypeQDateTime;
            break;
        } // switch(mode)
        break;

    case SciDAVis::DateTime:
    case SciDAVis::Month:
    case SciDAVis::Day:
        disconnect(static_cast<DateTime2StringFilter *>(d_output_filter), SIGNAL(formatChanged()),
                   d_owner, SLOT(notifyDisplayChange()));
        switch(mode)
        {
        case SciDAVis::DateTime:
            break;
        case SciDAVis::Text:
            if (!filter) {
                filter = outputFilter();
                filter_is_temporary = false;
            }
            temp_col = new Column("temp_col", *(static_cast< QList<QDateTime>* >(old_data)), d_validity);
            d_data = new QStringList();
            d_data_type = SciDAVis::TypeQString;
            break;
        case SciDAVis::Numeric:
            if (!filter) {
                if (d_column_mode == SciDAVis::Month)
                    filter = new Month2DoubleFilter();
                else if (d_column_mode == SciDAVis::Day)
                    filter = new DayOfWeek2DoubleFilter();
                else
                    filter = new DateTime2DoubleFilter();
                filter_is_temporary = true;
            }
            temp_col = new Column("temp_col", *(static_cast< QList<QDateTime>* >(old_data)), d_validity);
            d_data = new QVector<double>();
            d_data_type = SciDAVis::TypeDouble;
            break;
        case SciDAVis::Month:
        case SciDAVis::Day:
            break;
        } // switch(mode)
        break;

    }

    // determine the new input and output filters
    switch(mode)
    {
    case SciDAVis::Numeric:
        new_in_filter = new String2DoubleFilter();
        new_out_filter = new Double2StringFilter();
#ifdef LEGACY_CODE_0_2_x  // TODO: in a later version this must use the new global setting method
        {
#ifdef Q_OS_MAC // Mac
            QSettings settings(QSettings::IniFormat,QSettings::UserScope, "SciDAVis", "SciDAVis");
#else
            QSettings settings(QSettings::NativeFormat,QSettings::UserScope, "SciDAVis", "SciDAVis");
#endif
            settings.beginGroup("/General");
            static_cast<Double2StringFilter *>(new_out_filter)->setNumDigits(settings.value("/DecimalDigits", 14).toInt());
            static_cast<Double2StringFilter *>(new_out_filter)->setNumericFormat(settings.value("/DefaultNumericFormat", 'f').toChar().toAscii());
        }
#endif
        connect(static_cast<Double2StringFilter *>(new_out_filter), SIGNAL(formatChanged()),
                d_owner, SLOT(notifyDisplayChange()));
        break;
    case SciDAVis::Text:
        new_in_filter = new SimpleCopyThroughFilter();
        new_out_filter = new SimpleCopyThroughFilter();
        break;
    case SciDAVis::DateTime:
        new_in_filter = new String2DateTimeFilter();
        new_out_filter = new DateTime2StringFilter();
        connect(static_cast<DateTime2StringFilter *>(new_out_filter), SIGNAL(formatChanged()),
                d_owner, SLOT(notifyDisplayChange()));
        break;
    case SciDAVis::Month:
        new_in_filter = new String2MonthFilter();
        new_out_filter = new DateTime2StringFilter();
        static_cast<DateTime2StringFilter *>(new_out_filter)->setFormat("MMMM");
        connect(static_cast<DateTime2StringFilter *>(new_out_filter), SIGNAL(formatChanged()),
                d_owner, SLOT(notifyDisplayChange()));
        break;
    case SciDAVis::Day:
        new_in_filter = new String2DayOfWeekFilter();
        new_out_filter = new DateTime2StringFilter();
        static_cast<DateTime2StringFilter *>(new_out_filter)->setFormat("dddd");
        connect(static_cast<DateTime2StringFilter *>(new_out_filter), SIGNAL(formatChanged()),
                d_owner, SLOT(notifyDisplayChange()));
        break;
    } // switch(mode)

    d_column_mode = mode;

    new_in_filter->setName("InputFilter");
    new_out_filter->setName("OutputFilter");
    d_input_filter = new_in_filter;
    d_output_filter = new_out_filter;
    d_input_filter->input(0, d_owner->d_string_io);
    d_output_filter->input(0, d_owner);

    if (temp_col) // if temp_col == 0, only the input/output filters need to be changed
    {
        // copy the filtered, i.e. converted, column
        filter->input(0, temp_col);
        copy(filter->output(0));
        delete temp_col;

        if (filter_is_temporary) delete filter;
    }

    emit d_owner->modeChanged(d_owner);
}
void ClassifySvmSharedCommand::processSharedAndDesignData(vector<SharedRAbundVector*> lookup) {
    try {
        OutputFilter outputFilter(verbosity);

        LabeledObservationVector labeledObservationVector;
        FeatureVector featureVector;
        readSharedRAbundVectors(lookup, designMap, labeledObservationVector, featureVector);

        // optionally remove features with low standard deviation
        if ( stdthreshold > 0.0 ) {
            FeatureVector removedFeatureVector = applyStdThreshold(stdthreshold, labeledObservationVector, featureVector);
            if (removedFeatureVector.size() > 0) {
                std::cout << removedFeatureVector.size() << " OTUs were below the stdthreshold of " << stdthreshold << " and were removed" << std::endl;
                if ( outputFilter.debug() ) {
                    std::cout << "the following OTUs were below the standard deviation threshold of " << stdthreshold << std::endl;
                    for (FeatureVector::iterator i = removedFeatureVector.begin(); i != removedFeatureVector.end(); i++) {
                        std::cout << "  " << i->getFeatureLabel() << std::endl;
                    }
                }
            }
        }

        // apply [0,1] standardization
        if ( transformName == "zeroone") {
            std::cout << "transforming data to lie within range [0,1]" << std::endl;
            transformZeroOne(labeledObservationVector);
        }
        else {
            std::cout << "transforming data to have zero mean and unit variance" << std::endl;
            transformZeroMeanUnitVariance(labeledObservationVector);
        }

        SvmDataset svmDataset(labeledObservationVector, featureVector);

        OneVsOneMultiClassSvmTrainer trainer(svmDataset, evaluationFoldCount, trainingFoldCount, *this, outputFilter);

        if ( mode == "rfe" ) {
            SvmRfe svmRfe;
            ParameterRange& linearKernelConstantRange = kernelParameterRangeMap["linear"]["constant"];
            ParameterRange& linearKernelSmoCRange = kernelParameterRangeMap["linear"]["smoc"];
            RankedFeatureList rankedFeatureList = svmRfe.getOrderedFeatureList(svmDataset, trainer, linearKernelConstantRange, linearKernelSmoCRange);

            map<string, string> variables;
            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
            variables["[distance]"] = lookup[0]->getLabel();
            string filename = getOutputFileName("summary", variables);
            outputNames.push_back(filename);
            outputTypes["summary"].push_back(filename);
            m->mothurOutEndLine();

            std::ofstream outputFile(filename.c_str());

            int n = 0;
            int rfeRoundCount = rankedFeatureList.front().getRank();
            std::cout << "ordered features:" << std::endl;
            std::cout << setw(5)  << "index"
                      << setw(12) << "OTU"
                      << setw(5)  << "rank"
                      << std::endl;
            outputFile << setw(5)  << "index"
                       << setw(12) << "OTU"
                       << setw(5)  << "rank"
                       << std::endl;
            for (RankedFeatureList::iterator i = rankedFeatureList.begin(); i != rankedFeatureList.end(); i++) {
                n++;
                int rank = rfeRoundCount - i->getRank() + 1;
                outputFile << setw(5)  << n
                           << setw(12) << i->getFeature().getFeatureLabel()
                           << setw(5)  << rank
                           << std::endl;
                if ( n <= 20 ) {
                    std::cout << setw(5) << n
                              << setw(12) << i->getFeature().getFeatureLabel()
                              << setw(5) << rank
                              << std::endl;
                }
            }
            outputFile.close();
        }
        else {
            MultiClassSVM* mcsvm = trainer.train(kernelParameterRangeMap);

            map<string, string> variables;
            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
            variables["[distance]"] = lookup[0]->getLabel();
            string filename = getOutputFileName("summary", variables);
            outputNames.push_back(filename);
            outputTypes["summary"].push_back(filename);
            m->mothurOutEndLine();

            std::ofstream outputFile(filename.c_str());

            printPerformanceSummary(mcsvm, std::cout);
            printPerformanceSummary(mcsvm, outputFile);

            outputFile << "actual  predicted" << std::endl;
            for ( LabeledObservationVector::const_iterator i = labeledObservationVector.begin(); i != labeledObservationVector.end(); i++ ) {
                Label actualLabel = i->getLabel();
                outputFile << i->getDatasetIndex() << " " << actualLabel << " ";
                try {
                    Label predictedLabel = mcsvm->classify(*(i->getObservation()));
                    outputFile << predictedLabel << std::endl;
                }
                catch ( MultiClassSvmClassificationTie& m ) {
                    outputFile << "tie" << std::endl;
                    std::cout << "classification tie for observation " << i->datasetIndex << " with label " << i->first << std::endl;
                }

            }
            outputFile.close();
            delete mcsvm;
        }

    }
    catch (exception& e) {
        m->errorOut(e, "ClassifySvmSharedCommand", "processSharedAndDesignData");
        exit(1);
    }
}