Exemplo n.º 1
0
QVariantMap MemoryUsageModel::details(int index) const
{
    QVariantMap result;
    const MemoryAllocationItem *ev = &m_data[index];

    if (ev->allocated >= -ev->deallocated)
        result.insert(QLatin1String("displayName"), tr("Memory Allocated"));
    else
        result.insert(QLatin1String("displayName"), tr("Memory Freed"));

    result.insert(tr("Total"), tr("%1 bytes").arg(ev->size));
    if (ev->allocations > 0) {
        result.insert(tr("Allocated"), tr("%1 bytes").arg(ev->allocated));
        result.insert(tr("Allocations"), QString::number(ev->allocations));
    }
    if (ev->deallocations > 0) {
        result.insert(tr("Deallocated"), tr("%1 bytes").arg(-ev->deallocated));
        result.insert(tr("Deallocations"), QString::number(ev->deallocations));
    }
    QString memoryTypeName;
    switch (selectionId(index)) {
    case HeapPage:  memoryTypeName = tr("Heap Allocation"); break;
    case LargeItem: memoryTypeName = tr("Large Item Allocation"); break;
    case SmallItem: memoryTypeName = tr("Heap Usage"); break;
    default: Q_UNREACHABLE();
    }
    result.insert(tr("Type"), memoryTypeName);

    result.insert(tr("Location"),
                  modelManager()->qmlModel()->eventTypes().at(ev->typeId).displayName());
    return result;
}
bool QmlProfilerTimelineModel::handlesTypeId(int typeIndex) const
{
    if (typeIndex < 0)
        return false;

    return accepted(modelManager()->qmlModel()->getEventTypes().at(typeIndex));
}
Exemplo n.º 3
0
void QmlProfilerRangeModel::finalize()
{
    if (!m_stack.isEmpty()) {
        qWarning() << "End times for some events are missing.";
        const qint64 endTime = modelManager()->traceEnd();
        do {
            int index = m_stack.pop();
            insertEnd(index, endTime - startTime(index));
        } while (!m_stack.isEmpty());
    }

    // compute range nesting
    computeNesting();

    // compute nestingLevel - nonexpanded
    computeNestingContracted();

    // compute nestingLevel - expanded
    computeExpandedLevels();

    if (supportsBindingLoops())
        findBindingLoops();

    QmlProfilerTimelineModel::finalize();
}
Exemplo n.º 4
0
QVariantMap DebugMessagesModel::details(int index) const
{
    const QmlEventType &type = modelManager()->qmlModel()->eventTypes()[m_data[index].typeId];

    QVariantMap result;
    result.insert(QLatin1String("displayName"), messageType(type.detailType()));
    result.insert(tr("Timestamp"), QmlProfilerDataModel::formatTime(startTime(index)));
    result.insert(tr("Message"), m_data[index].text);
    result.insert(tr("Location"), type.displayName());
    return result;
}
Exemplo n.º 5
0
void PixmapCacheModel::finalize()
{
    if (m_lastCacheSizeEvent != -1) {
        insertEnd(m_lastCacheSizeEvent, modelManager()->traceTime()->endTime() -
                  startTime(m_lastCacheSizeEvent));
    }

    resizeUnfinishedLoads();
    computeMaxCacheSize();
    flattenLoads();
    computeNesting();
}
Exemplo n.º 6
0
void QmlProfilerTraceFile::loadQzt(QIODevice *device)
{
    QDataStream stream(device);
    stream.setVersion(QDataStream::Qt_5_5);

    QByteArray magic;
    stream >> magic;
    if (magic != QByteArray("QMLPROFILER")) {
        fail(tr("Invalid magic: %1").arg(QLatin1String(magic)));
        return;
    }

    qint32 dataStreamVersion;
    stream >> dataStreamVersion;

    if (dataStreamVersion > QDataStream::Qt_DefaultCompiledVersion) {
        fail(tr("Unknown data stream version: %1").arg(dataStreamVersion));
        return;
    }
    stream.setVersion(dataStreamVersion);

    qint64 traceStart, traceEnd;
    stream >> traceStart >> traceEnd;
    setTraceStart(traceStart);
    setTraceEnd(traceEnd);

    QBuffer buffer;
    QDataStream bufferStream(&buffer);
    bufferStream.setVersion(dataStreamVersion);
    QByteArray data;
    setDeviceProgress(device);

    QmlProfilerModelManager *manager = modelManager();
    if (!isCanceled()) {
        stream >> data;
        buffer.setData(qUncompress(data));
        buffer.open(QIODevice::ReadOnly);
        quint32 numEventTypes;
        bufferStream >> numEventTypes;
        if (numEventTypes > quint32(std::numeric_limits<int>::max())) {
            fail(tr("Excessive number of event types: %1").arg(numEventTypes));
            return;
        }

        for (int typeId = 0; typeId < static_cast<int>(numEventTypes); ++typeId) {
            QmlEventType type;
            bufferStream >> type;
            manager->setEventType(typeId, std::move(type));
        }
        buffer.close();
        setDeviceProgress(device);
    }
Exemplo n.º 7
0
QVariantMap QmlProfilerRangeModel::details(int index) const
{
    QVariantMap result;
    int id = selectionId(index);

    result.insert(QStringLiteral("displayName"),
                  tr(QmlProfilerModelManager::featureName(mainFeature())));
    result.insert(tr("Duration"), Timeline::formatTime(duration(index)));

    const QmlEventType &type = modelManager()->eventType(id);
    result.insert(tr("Details"), type.data());
    result.insert(tr("Location"), type.displayName());
    return result;
}
QVariantMap QmlProfilerRangeModel::location(int index) const
{
    QVariantMap result;
    int id = selectionId(index);

    const QmlDebug::QmlEventLocation &location
            = modelManager()->qmlModel()->getEventTypes().at(id).location;

    result.insert(QStringLiteral("file"), location.filename);
    result.insert(QStringLiteral("line"), location.line);
    result.insert(QStringLiteral("column"), location.column);

    return result;
}
Exemplo n.º 9
0
void PixmapCacheModel::resizeUnfinishedLoads()
{
    // all the unfinished "load start" events continue till the end of the trace
    for (auto pixmap = m_pixmaps.begin(), pixmapsEnd = m_pixmaps.end();
         pixmap != pixmapsEnd; ++pixmap) {
        for (auto size = pixmap->sizes.begin(), sizesEnd = pixmap->sizes.end(); size != sizesEnd;
             ++size) {
            if (size->loadState == Loading) {
                insertEnd(size->started,
                          modelManager()->traceTime()->endTime() - startTime(size->started));
                size->loadState = Error;
            }
        }
    }
}
QVariantMap QmlProfilerRangeModel::details(int index) const
{
    QVariantMap result;
    int id = selectionId(index);
    const QVector<QmlProfilerDataModel::QmlEventTypeData> &types =
            modelManager()->qmlModel()->getEventTypes();

    result.insert(QStringLiteral("displayName"),
                  tr(QmlProfilerModelManager::featureName(mainFeature())));
    result.insert(tr("Duration"), QmlProfilerBaseModel::formatTime(duration(index)));

    result.insert(tr("Details"), types[id].data);
    result.insert(tr("Location"), types[id].displayName);
    return result;
}
int QmlProfilerRangeModel::selectionIdForLocation(const QString &filename, int line, int column) const
{
    // if this is called from v8 view, we don't have the column number, it will be -1
    const QVector<QmlProfilerDataModel::QmlEventTypeData> &types =
            modelManager()->qmlModel()->getEventTypes();
    for (int i = 1; i < expandedRowCount(); ++i) {
        int typeId = m_expandedRowTypes[i];
        const QmlProfilerDataModel::QmlEventTypeData &eventData = types[typeId];
        if (eventData.location.filename == filename &&
                eventData.location.line == line &&
                (column == -1 || eventData.location.column == column))
            return typeId;
    }
    return -1;
}
QVariantList QmlProfilerRangeModel::labels() const
{
    QVariantList result;

    const QVector<QmlProfilerDataModel::QmlEventTypeData> &types =
            modelManager()->qmlModel()->getEventTypes();
    for (int i = 1; i < expandedRowCount(); i++) { // Ignore the -1 for the first row
        QVariantMap element;
        int typeId = m_expandedRowTypes[i];
        element.insert(QLatin1String("displayName"), QVariant(types[typeId].displayName));
        element.insert(QLatin1String("description"), QVariant(types[typeId].data));
        element.insert(QLatin1String("id"), QVariant(typeId));
        result << element;
    }

    return result;
}
Exemplo n.º 13
0
QVariantList QmlProfilerRangeModel::labels() const
{
    QVariantList result;

    const QmlProfilerModelManager *manager = modelManager();
    for (int i = 1; i < expandedRowCount(); i++) { // Ignore the -1 for the first row
        QVariantMap element;
        const int typeId = m_expandedRowTypes[i];
        const QmlEventType &type = manager->eventType(typeId);
        element.insert(QLatin1String("displayName"), type.displayName());
        element.insert(QLatin1String("description"), type.data());
        element.insert(QLatin1String("id"), typeId);
        result << element;
    }

    return result;
}
QVariantMap QmlProfilerTimelineModel::locationFromTypeId(int index) const
{
    QVariantMap result;
    int id = typeId(index);
    if (id < 0)
        return result;

    auto types = modelManager()->qmlModel()->eventTypes();
    if (id >= types.length())
        return result;

    QmlEventLocation location = types.at(id).location();

    result.insert(QStringLiteral("file"), location.filename());
    result.insert(QStringLiteral("line"), location.line());
    result.insert(QStringLiteral("column"), location.column());

    return result;
}
Exemplo n.º 15
0
/* Ultimately there is no way to know which cache entry a given event refers to as long as we only
 * receive the pixmap URL from the application. Multiple copies of different sizes may be cached
 * for each URL. However, we can apply some heuristics to make the result somewhat plausible by
 * using the following assumptions:
 *
 * - PixmapSizeKnown will happen at most once for every cache entry.
 * - PixmapSizeKnown cannot happen for entries with PixmapLoadingError and vice versa.
 * - PixmapCacheCountChanged can happen for entries with PixmapLoadingError but doesn't have to.
 * - Decreasing PixmapCacheCountChanged events can only happen for entries that have seen an
 *   increasing PixmapCacheCountChanged (but that may have happened before the trace).
 * - PixmapCacheCountChanged can happen before or after PixmapSizeKnown.
 * - For every PixmapLoadingFinished or PixmapLoadingError there is exactly one
 *   PixmapLoadingStarted event, but it may be before the trace.
 * - For every PixmapLoadingStarted there is exactly one PixmapLoadingFinished or
 *   PixmapLoadingError, but it may be after the trace.
 * - Decreasing PixmapCacheCountChanged events in the presence of corrupt cache entries are more
 *   likely to clear those entries than other, correctly loaded ones.
 * - Increasing PixmapCacheCountChanged events are more likely to refer to correctly loaded entries
 *   than to ones with PixmapLoadingError.
 * - PixmapLoadingFinished and PixmapLoadingError are more likely to refer to cache entries that
 *   have seen a PixmapLoadingStarted than to ones that haven't.
 *
 * For each URL we keep an ordered list of pixmaps possibly being loaded and assign new events to
 * the first entry that "fits". If multiple sizes of the same pixmap are being loaded concurrently
 * we generally assume that the PixmapLoadingFinished and PixmapLoadingError events occur in the
 * order we learn about the existence of these sizes, subject to the above constraints. This is not
 * necessarily the order the pixmaps are really loaded but it's the best we can do with the given
 * information. If they're loaded sequentially the representation is correct.
 */
void PixmapCacheModel::loadEvent(const QmlEvent &event, const QmlEventType &type)
{
    PixmapCacheItem newEvent;
    const PixmapEventType pixmapType = static_cast<PixmapEventType>(type.detailType());
    newEvent.pixmapEventType = pixmapType;
    qint64 pixmapStartTime = event.timestamp();

    newEvent.urlIndex = -1;
    for (auto i = m_pixmaps.cend(), begin = m_pixmaps.cbegin(); i != begin;) {
        if ((--i)->url == type.location().filename()) {
            newEvent.urlIndex = i - m_pixmaps.cbegin();
            break;
        }
    }

    newEvent.sizeIndex = -1;
    if (newEvent.urlIndex == -1) {
        newEvent.urlIndex = m_pixmaps.count();
        m_pixmaps << Pixmap(type.location().filename());
    }

    Pixmap &pixmap = m_pixmaps[newEvent.urlIndex];
    switch (pixmapType) {
    case PixmapSizeKnown: {// pixmap size
        // Look for pixmaps for which we don't know the size, yet and which have actually been
        // loaded.
        for (auto i = pixmap.sizes.begin(), end = pixmap.sizes.end(); i != end; ++i) {
            if (i->size.isValid() || i->cacheState == Uncacheable || i->cacheState == Corrupt)
                continue;

            // We can't have cached it before we knew the size
            Q_ASSERT(i->cacheState != Cached);

            i->size.setWidth(event.number<qint32>(0));
            i->size.setHeight(event.number<qint32>(1));
            newEvent.sizeIndex = i - pixmap.sizes.begin();
            break;
        }

        if (newEvent.sizeIndex == -1) {
            newEvent.sizeIndex = pixmap.sizes.length();
            pixmap.sizes << PixmapState(event.number<qint32>(0), event.number<qint32>(1));
        }

        PixmapState &state = pixmap.sizes[newEvent.sizeIndex];
        if (state.cacheState == ToBeCached) {
            m_lastCacheSizeEvent = updateCacheCount(m_lastCacheSizeEvent, pixmapStartTime,
                                          state.size.width() * state.size.height(), newEvent,
                                          event.typeIndex());
            state.cacheState = Cached;
        }
        break;
    }
    case PixmapCacheCountChanged: {// Cache Size Changed Event
        bool uncache = m_cumulatedCount > event.number<qint32>(2);
        m_cumulatedCount = event.number<qint32>(2);
        qint64 pixSize = 0;

        // First try to find a preferred pixmap, which either is Corrupt and will be uncached
        // or is uncached and will be cached.
        for (auto i = pixmap.sizes.begin(), end = pixmap.sizes.end(); i != end; ++i) {
            if (uncache && i->cacheState == Corrupt) {
                newEvent.sizeIndex = i - pixmap.sizes.begin();
                i->cacheState = Uncacheable;
                break;
            } else if (!uncache && i->cacheState == Uncached) {
                newEvent.sizeIndex = i - pixmap.sizes.begin();
                if (i->size.isValid()) {
                    pixSize = i->size.width() * i->size.height();
                    i->cacheState = Cached;
                } else {
                    i->cacheState = ToBeCached;
                }
                break;
            }
        }

        // If none found, check for cached or ToBeCached pixmaps that shall be uncached or
        // Error pixmaps that become corrupt cache entries. We also accept Initial to be
        // uncached as we may have missed the matching PixmapCacheCountChanged that cached it.
        if (newEvent.sizeIndex == -1) {
            for (auto i = pixmap.sizes.begin(), end = pixmap.sizes.end(); i != end; ++i) {
                if (uncache && (i->cacheState == Cached || i->cacheState == ToBeCached)) {
                    newEvent.sizeIndex = i - pixmap.sizes.begin();
                    if (i->size.isValid())
                        pixSize = -i->size.width() * i->size.height();
                    i->cacheState = Uncached;
                    break;
                } else if (!uncache && i->cacheState == Uncacheable) {
                    // A pixmap can repeatedly be cached, become corrupt, and the be uncached again.
                    newEvent.sizeIndex = i - pixmap.sizes.begin();
                    i->cacheState = Corrupt;
                    break;
                }
            }
        }

        // If that does't work, create a new entry.
        if (newEvent.sizeIndex == -1) {
            newEvent.sizeIndex = pixmap.sizes.length();
            pixmap.sizes << PixmapState(uncache ? Uncached : ToBeCached);
            // now the size is 0. Thus, there is no point in updating the size row.
        } else if (pixSize != 0) {
            m_lastCacheSizeEvent = updateCacheCount(m_lastCacheSizeEvent, pixmapStartTime, pixSize,
                                                    newEvent, event.typeIndex());
        }
        break;
    }
    case PixmapLoadingStarted: { // Load
        // Look for a pixmap that hasn't been started, yet. There may have been a refcount
        // event, which we ignore.
        for (auto i = pixmap.sizes.cbegin(), end = pixmap.sizes.cend(); i != end; ++i) {
            if (i->loadState == Initial) {
                newEvent.sizeIndex = i - pixmap.sizes.cbegin();
                break;
            }
        }
        if (newEvent.sizeIndex == -1) {
            newEvent.sizeIndex = pixmap.sizes.length();
            pixmap.sizes << PixmapState();
        }

        PixmapState &state = pixmap.sizes[newEvent.sizeIndex];
        state.loadState = Loading;
        newEvent.typeId = event.typeIndex();
        state.started = insertStart(pixmapStartTime, newEvent.urlIndex + 1);
        m_data.insert(state.started, newEvent);
        break;
    }
    case PixmapLoadingFinished:
    case PixmapLoadingError: {
        // First try to find one that has already started
        for (auto i = pixmap.sizes.cbegin(), end = pixmap.sizes.cend(); i != end; ++i) {
            if (i->loadState != Loading)
                continue;
            // Pixmaps with known size cannot be errors and vice versa
            if (pixmapType == PixmapLoadingError && i->size.isValid())
                continue;

            newEvent.sizeIndex = i - pixmap.sizes.cbegin();
            break;
        }

        // If none was found use any other compatible one
        if (newEvent.sizeIndex == -1) {
            for (auto i = pixmap.sizes.cbegin(), end = pixmap.sizes.cend(); i != end; ++i) {
                if (i->loadState != Initial)
                    continue;
                // Pixmaps with known size cannot be errors and vice versa
                if (pixmapType == PixmapLoadingError && i->size.isValid())
                    continue;

                newEvent.sizeIndex = i - pixmap.sizes.cbegin();
                break;
            }
        }

        // If again none was found, create one.
        if (newEvent.sizeIndex == -1) {
            newEvent.sizeIndex = pixmap.sizes.length();
            pixmap.sizes << PixmapState();
        }

        PixmapState &state = pixmap.sizes[newEvent.sizeIndex];
        // If the pixmap loading wasn't started, start it at traceStartTime()
        if (state.loadState == Initial) {
            newEvent.pixmapEventType = PixmapLoadingStarted;
            newEvent.typeId = event.typeIndex();
            qint64 traceStart = modelManager()->traceTime()->startTime();
            state.started = insert(traceStart, pixmapStartTime - traceStart,
                                   newEvent.urlIndex + 1);
            m_data.insert(state.started, newEvent);

            // All other indices are wrong now as we've prepended. Fix them ...
            if (m_lastCacheSizeEvent >= state.started)
                ++m_lastCacheSizeEvent;

            for (int pixmapIndex = 0; pixmapIndex < m_pixmaps.count(); ++pixmapIndex) {
                Pixmap &brokenPixmap = m_pixmaps[pixmapIndex];
                for (int sizeIndex = 0; sizeIndex < brokenPixmap.sizes.count(); ++sizeIndex) {
                    PixmapState &brokenSize = brokenPixmap.sizes[sizeIndex];
                    if ((pixmapIndex != newEvent.urlIndex || sizeIndex != newEvent.sizeIndex) &&
                            brokenSize.started >= state.started) {
                        ++brokenSize.started;
                    }
                }
            }
        } else {
            insertEnd(state.started, pixmapStartTime - startTime(state.started));
        }

        if (pixmapType == PixmapLoadingError) {
            state.loadState = Error;
            switch (state.cacheState) {
            case Uncached:
                state.cacheState = Uncacheable;
                break;
            case ToBeCached:
                state.cacheState = Corrupt;
                break;
            default:
                // Cached cannot happen as size would have to be known and Corrupt or
                // Uncacheable cannot happen as we only accept one finish or error event per
                // pixmap.
                Q_UNREACHABLE();
            }
        } else {
            state.loadState = Finished;
        }
        break;
    }
    default:
        break;
    }
}
Exemplo n.º 16
0
void MergedModelFactory_cl::MergeModel()
{
  VASSERT(m_iModelsToMerge > 1 && m_iModelsToMerge < 11);
  DeleteModels();

  // Always use the body
  MeshMergeInfo_t info[32]; // maximum count
  info[0].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Body.model", true);

  int i = 1;

  // Armor
  if (m_bArmArmor)
    info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Arm.model", true);
  if (m_bShoulderArmor)
    info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Shoulder.model", true);
  if (m_bLegsArmor)
    info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Legs.model", true);
  if (m_bKneeArmor)
    info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Knee.model", true);
  if (m_bAccessoire)
    info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Accessoire.model", true);
  if (m_bBelt)
    info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Belt.model", true);
  if (m_bCloth)
    info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Cloth.model", true);
  if (m_bBeard)
    info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Beard.model", true);

  // Weapon
  if (m_bAxe)
    info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Axe.model", true);
  else
    info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Sword.model", true);

  // Setup model
  m_pMergedModelEntity = (KeyControlledTransitionBarbarian_cl *) Vision::Game.CreateEntity("KeyControlledTransitionBarbarian_cl", m_vPos, "Barbarian_Body.model");

  if (m_iModelsToMerge > 1)
  {
    // Merge model
    VMeshManager &modelManager(VDynamicMesh::GetResourceManager());
    VDynamicMesh *pMergedModel = modelManager.MergeDynamicMeshes("MergedModel", info, m_iModelsToMerge);
    VTransitionStateMachine* pTransitionStateMachine = m_pMergedModelEntity->Components().GetComponentOfType<VTransitionStateMachine>();
    if(pTransitionStateMachine != NULL)
      m_pMergedModelEntity->RemoveComponent(pTransitionStateMachine);

    m_pMergedModelEntity->SetMesh(pMergedModel);

    // Load sequence set
    VisAnimSequenceSet_cl *pSet = Vision::Animations.GetSequenceSetManager()->LoadAnimSequenceSet("Barbarian_Body.anim");
    m_pMergedModelEntity->GetMesh()->GetSequenceSetCollection()->Add(pSet);

    if(pTransitionStateMachine != NULL)
      m_pMergedModelEntity->AddComponent(pTransitionStateMachine);
  }

  // Init entity
  m_pMergedModelEntity->InitFunction();
  m_pMergedModelEntity->SetPosition(m_vPos);
  m_pMergedModelEntity->SetOrientation(m_vOri);
  m_pCameraEntity->AttachToParent(m_pMergedModelEntity);
  m_pPlayerCamera->ResetOldPosition();
  m_pPlayerCamera->Follow = false;
  m_pPlayerCamera->Zoom = true;
}
Exemplo n.º 17
0
int main(int argc, char** argv) {
  ////////////////////////////////////////////////
  BEGIN_PARAMETER_LIST(pl)
  ADD_PARAMETER_GROUP(pl, "Basic Input/Output")
  ADD_STRING_PARAMETER(pl, inVcf, "--inVcf", "Input VCF File")
  ADD_STRING_PARAMETER(pl, outPrefix, "--out", "Output prefix")
  ADD_BOOL_PARAMETER(pl, outputRaw, "--outputRaw",
                     "Output genotypes, phenotype, covariates(if any) and "
                     "collapsed genotype to tabular files")

  ADD_PARAMETER_GROUP(pl, "Specify Covariate")
  ADD_STRING_PARAMETER(pl, cov, "--covar", "Specify covariate file")
  ADD_STRING_PARAMETER(
      pl, covName, "--covar-name",
      "Specify the column name in covariate file to be included in analysis")
  ADD_BOOL_PARAMETER(pl, sex, "--sex",
                     "Include sex (5th column in the PED file) as a covariate")

  ADD_PARAMETER_GROUP(pl, "Specify Phenotype")
  ADD_STRING_PARAMETER(pl, pheno, "--pheno", "Specify phenotype file")
  ADD_BOOL_PARAMETER(pl, inverseNormal, "--inverseNormal",
                     "Transform phenotype like normal distribution")
  ADD_BOOL_PARAMETER(
      pl, useResidualAsPhenotype, "--useResidualAsPhenotype",
      "Fit covariate ~ phenotype, use residual to replace phenotype")
  ADD_STRING_PARAMETER(pl, mpheno, "--mpheno",
                       "Specify which phenotype column to read (default: 1)")
  ADD_STRING_PARAMETER(pl, phenoName, "--pheno-name",
                       "Specify which phenotype column to read by header")
  ADD_BOOL_PARAMETER(pl, qtl, "--qtl", "Treat phenotype as quantitative trait")
  ADD_STRING_PARAMETER(
      pl, multiplePheno, "--multiplePheno",
      "Specify aa template file for analyses of more than one phenotype")

  ADD_PARAMETER_GROUP(pl, "Specify Genotype")
  ADD_STRING_PARAMETER(pl, dosageTag, "--dosage",
                       "Specify which dosage tag to use. (e.g. EC or DS)")

  ADD_PARAMETER_GROUP(pl, "Chromosome X Options")
  ADD_STRING_PARAMETER(pl, xLabel, "--xLabel",
                       "Specify X chromosome label (default: 23|X)")
  ADD_STRING_PARAMETER(pl, xParRegion, "--xParRegion",
                       "Specify PAR region (default: hg19), can be build "
                       "number e.g. hg38, b37; or specify region, e.g. "
                       "'60001-2699520,154931044-155260560'")

  ADD_PARAMETER_GROUP(pl, "People Filter")
  ADD_STRING_PARAMETER(pl, peopleIncludeID, "--peopleIncludeID",
                       "List IDs of people that will be included in study")
  ADD_STRING_PARAMETER(
      pl, peopleIncludeFile, "--peopleIncludeFile",
      "From given file, set IDs of people that will be included in study")
  ADD_STRING_PARAMETER(pl, peopleExcludeID, "--peopleExcludeID",
                       "List IDs of people that will be included in study")
  ADD_STRING_PARAMETER(
      pl, peopleExcludeFile, "--peopleExcludeFile",
      "From given file, set IDs of people that will be included in study")

  ADD_PARAMETER_GROUP(pl, "Site Filter")
  ADD_STRING_PARAMETER(
      pl, rangeList, "--rangeList",
      "Specify some ranges to use, please use chr:begin-end format.")
  ADD_STRING_PARAMETER(
      pl, rangeFile, "--rangeFile",
      "Specify the file containing ranges, please use chr:begin-end format.")
  ADD_STRING_PARAMETER(pl, siteFile, "--siteFile",
                       "Specify the file containing sites to include, please "
                       "use \"chr pos\" format.")
  ADD_INT_PARAMETER(
      pl, siteDepthMin, "--siteDepthMin",
      "Specify minimum depth(inclusive) to be included in analysis")
  ADD_INT_PARAMETER(
      pl, siteDepthMax, "--siteDepthMax",
      "Specify maximum depth(inclusive) to be included in analysis")
  ADD_INT_PARAMETER(pl, siteMACMin, "--siteMACMin",
                    "Specify minimum Minor Allele Count(inclusive) to be "
                    "included in analysis")
  ADD_STRING_PARAMETER(pl, annoType, "--annoType",
                       "Specify annotation type that is followed by ANNO= in "
                       "the VCF INFO field, regular expression is allowed ")

  ADD_PARAMETER_GROUP(pl, "Genotype Filter")
  ADD_INT_PARAMETER(
      pl, indvDepthMin, "--indvDepthMin",
      "Specify minimum depth(inclusive) of a sample to be included in analysis")
  ADD_INT_PARAMETER(
      pl, indvDepthMax, "--indvDepthMax",
      "Specify maximum depth(inclusive) of a sample to be included in analysis")
  ADD_INT_PARAMETER(
      pl, indvQualMin, "--indvQualMin",
      "Specify minimum depth(inclusive) of a sample to be included in analysis")

  ADD_PARAMETER_GROUP(pl, "Association Model")
  ADD_STRING_PARAMETER(pl, modelSingle, "--single",
                       "Single variant tests, choose from: score, wald, exact, "
                       "famScore, famLrt, famGrammarGamma, firth")
  ADD_STRING_PARAMETER(pl, modelBurden, "--burden",
                       "Burden tests, choose from: cmc, zeggini, mb, exactCMC, "
                       "rarecover, cmat, cmcWald")
  ADD_STRING_PARAMETER(pl, modelVT, "--vt",
                       "Variable threshold tests, choose from: price, analytic")
  ADD_STRING_PARAMETER(
      pl, modelKernel, "--kernel",
      "Kernal-based tests, choose from: SKAT, KBAC, FamSKAT, SKATO")
  ADD_STRING_PARAMETER(pl, modelMeta, "--meta",
                       "Meta-analysis related functions to generate summary "
                       "statistics, choose from: score, cov, dominant, "
                       "recessive")

  ADD_PARAMETER_GROUP(pl, "Family-based Models")
  ADD_STRING_PARAMETER(pl, kinship, "--kinship",
                       "Specify a kinship file for autosomal analysis, use "
                       "vcf2kinship to generate")
  ADD_STRING_PARAMETER(pl, xHemiKinship, "--xHemiKinship",
                       "Provide kinship for the chromosome X hemizygote region")
  ADD_STRING_PARAMETER(pl, kinshipEigen, "--kinshipEigen",
                       "Specify eigen decomposition results of a kinship file "
                       "for autosomal analysis")
  ADD_STRING_PARAMETER(
      pl, xHemiKinshipEigen, "--xHemiKinshipEigen",
      "Specify eigen decomposition results of a kinship file for X analysis")

  ADD_PARAMETER_GROUP(pl, "Grouping Unit ")
  ADD_STRING_PARAMETER(pl, geneFile, "--geneFile",
                       "Specify a gene file (for burden tests)")
  ADD_STRING_PARAMETER(pl, gene, "--gene", "Specify which genes to test")
  ADD_STRING_PARAMETER(pl, setList, "--setList",
                       "Specify a list to test (for burden tests)")
  ADD_STRING_PARAMETER(pl, setFile, "--setFile",
                       "Specify a list file (for burden tests, first 2 "
                       "columns: setName chr:beg-end)")
  ADD_STRING_PARAMETER(pl, set, "--set",
                       "Specify which set to test (1st column)")

  ADD_PARAMETER_GROUP(pl, "Frequency Cutoff")
  /*ADD_BOOL_PARAMETER(pl, freqFromFile, "--freqFromFile", "Obtain frequency
   * from external file")*/
  // ADD_BOOL_PARAMETER(pl, freqFromControl, "--freqFromControl", "Calculate
  // frequency from case samples")
  ADD_DOUBLE_PARAMETER(
      pl, freqUpper, "--freqUpper",
      "Specify upper minor allele frequency bound to be included in analysis")
  ADD_DOUBLE_PARAMETER(
      pl, freqLower, "--freqLower",
      "Specify lower minor allele frequency bound to be included in analysis")

  ADD_PARAMETER_GROUP(pl, "Missing Data")
  ADD_STRING_PARAMETER(
      pl, impute, "--impute",
      "Impute missing genotype (default:mean):  mean, hwe, and drop")
  ADD_BOOL_PARAMETER(
      pl, imputePheno, "--imputePheno",
      "Impute phenotype to mean of those have genotypes but no phenotypes")
  ADD_BOOL_PARAMETER(pl, imputeCov, "--imputeCov",
                     "Impute each covariate to its mean, instead of drop "
                     "samples with missing covariates")

  ADD_PARAMETER_GROUP(pl, "Conditional Analysis")
  ADD_STRING_PARAMETER(pl, condition, "--condition",
                       "Specify markers to be conditions (specify range)")

  ADD_PARAMETER_GROUP(pl, "Auxiliary Functions")
  ADD_BOOL_PARAMETER(pl, noweb, "--noweb", "Skip checking new version")
  ADD_BOOL_PARAMETER(pl, help, "--help", "Print detailed help message")
  END_PARAMETER_LIST(pl);

  pl.Read(argc, argv);

  if (FLAG_help) {
    pl.Help();
    return 0;
  }

  welcome();
  pl.Status();
  if (FLAG_REMAIN_ARG.size() > 0) {
    fprintf(stderr, "Unparsed arguments: ");
    for (unsigned int i = 0; i < FLAG_REMAIN_ARG.size(); i++) {
      fprintf(stderr, " %s", FLAG_REMAIN_ARG[i].c_str());
    }
    exit(1);
  }

  if (!FLAG_outPrefix.size()) FLAG_outPrefix = "rvtest";

  REQUIRE_STRING_PARAMETER(FLAG_inVcf,
                           "Please provide input file using: --inVcf");

  // check new version
  if (!FLAG_noweb) {
    VersionChecker ver;
    if (ver.retrieveRemoteVersion("http://zhanxw.com/rvtests/version") < 0) {
      fprintf(stderr,
              "Retrieve remote version failed, use '--noweb' to skip.\n");
    } else {
      ver.setLocalVersion(VERSION);
      if (ver.isRemoteVersionNewer()) {
        fprintf(stderr, "New version of rvtests is available:");
        ver.printRemoteContent();
      }
    }
  }

  // start logging
  Logger _logger((FLAG_outPrefix + ".log").c_str());
  logger = &_logger;
  logger->info("Program version: %s", VERSION);
  logger->infoToFile("Git Version: %s", GIT_VERSION);
  logger->infoToFile("Parameters BEGIN");
  pl.WriteToFile(logger->getHandle());
  logger->infoToFile("Parameters END");
  logger->sync();

  // start analysis
  time_t startTime = time(0);
  logger->info("Analysis started at: %s", currentTime().c_str());

  GenotypeExtractor ge(FLAG_inVcf);

  // set range filters here
  ge.setRangeList(FLAG_rangeList.c_str());
  ge.setRangeFile(FLAG_rangeFile.c_str());

  // set people filters here
  if (FLAG_peopleIncludeID.size() || FLAG_peopleIncludeFile.size()) {
    ge.excludeAllPeople();
    ge.includePeople(FLAG_peopleIncludeID.c_str());
    ge.includePeopleFromFile(FLAG_peopleIncludeFile.c_str());
  }
  ge.excludePeople(FLAG_peopleExcludeID.c_str());
  ge.excludePeopleFromFile(FLAG_peopleExcludeFile.c_str());

  if (FLAG_siteDepthMin > 0) {
    ge.setSiteDepthMin(FLAG_siteDepthMin);
    logger->info("Set site depth minimum to %d", FLAG_siteDepthMin);
  }
  if (FLAG_siteDepthMax > 0) {
    ge.setSiteDepthMax(FLAG_siteDepthMax);
    logger->info("Set site depth maximum to %d", FLAG_siteDepthMax);
  }
  if (FLAG_siteMACMin > 0) {
    ge.setSiteMACMin(FLAG_siteMACMin);
    logger->info("Set site minimum MAC to %d", FLAG_siteDepthMin);
  }
  if (FLAG_annoType != "") {
    ge.setAnnoType(FLAG_annoType.c_str());
    logger->info("Set annotype type filter to %s", FLAG_annoType.c_str());
  }

  std::vector<std::string> vcfSampleNames;
  ge.getPeopleName(&vcfSampleNames);
  logger->info("Loaded [ %zu ] samples from VCF files", vcfSampleNames.size());

  DataLoader dataLoader;
  dataLoader.setPhenotypeImputation(FLAG_imputePheno);
  dataLoader.setCovariateImputation(FLAG_imputeCov);

  if (FLAG_multiplePheno.empty()) {
    dataLoader.loadPhenotype(FLAG_pheno, FLAG_mpheno, FLAG_phenoName);

    // // load phenotypes
    // std::map<std::string, double> phenotype;
    // if (FLAG_pheno.empty()) {
    //   logger->error("Cannot do association when phenotype is missing!");
    //   return -1;
    // }

    // // check if alternative phenotype columns are used
    // if (!FLAG_mpheno.empty() && !FLAG_phenoName.empty()) {
    //   logger->error("Please specify either --mpheno or --pheno-name");
    //   return -1;
    // }
    // if (!FLAG_mpheno.empty()) {
    //   int col = atoi(FLAG_mpheno);
    //   int ret = loadPedPhenotypeByColumn(FLAG_pheno.c_str(), &phenotype,
    //   col);
    //   if (ret < 0) {
    //     logger->error("Loading phenotype failed!");
    //     return -1;
    //   }
    // } else if (!FLAG_phenoName.empty()) {
    //   int ret = loadPedPhenotypeByHeader(FLAG_pheno.c_str(), &phenotype,
    //                                      FLAG_phenoName.c_str());
    //   if (ret < 0) {
    //     logger->error("Loading phenotype failed!");
    //     return -1;
    //   }
    // } else {
    //   int col = 1;  // default use the first phenotype
    //   int ret = loadPedPhenotypeByColumn(FLAG_pheno.c_str(), &phenotype,
    //   col);
    //   if (ret < 0) {
    //     logger->error("Loading phenotype failed!");
    //     return -1;
    //   }
    // }
    // logger->info("Loaded [ %zu ] sample pheontypes.", phenotype.size());

    // rearrange phenotypes
    // drop samples from phenotype or vcf
    matchPhenotypeAndVCF("missing phenotype", &dataLoader, &ge);

    // // phenotype names (vcf sample names) arranged in the same order as in
    // VCF
    // std::vector<std::string> phenotypeNameInOrder;
    // std::vector<double>
    //     phenotypeInOrder;  // phenotype arranged in the same order as in VCF
    // rearrange(phenotype, vcfSampleNames, &vcfSampleToDrop,
    // &phenotypeNameInOrder,
    //           &phenotypeInOrder, FLAG_imputePheno);
    // if (vcfSampleToDrop.size()) {
    //   // exclude this sample from parsing VCF
    //   ge.excludePeople(vcfSampleToDrop);
    //   // output dropped samples
    //   for (size_t i = 0; i < vcfSampleToDrop.size(); ++i) {
    //     if (i == 0)
    //       logger->warn(
    //           "Total [ %zu ] samples are dropped from VCF file due to missing
    //           "
    //           "phenotype",
    //           vcfSampleToDrop.size());
    //     if (i >= 10) {
    //       logger->warn(
    //           "Skip outputting additional [ %d ] samples with missing "
    //           "phenotypes.",
    //           ((int)vcfSampleToDrop.size() - 10));
    //       break;
    //     }
    //     logger->warn("Drop sample [ %s ] from VCF file due to missing
    //     phenotype",
    //                  (vcfSampleToDrop)[i].c_str());
    //   }
    //   // logger->warn("Drop %zu sample from VCF file since we don't have
    //   their
    //   // phenotypes", vcfSampleToDrop.size());
    // }
    // if (phenotypeInOrder.size() != phenotype.size()) {
    //   logger->warn(
    //       "Drop [ %d ] samples from phenotype file due to missing genotypes
    //       from "
    //       "VCF files",
    //       (int)(phenotype.size() - phenotypeInOrder.size()));
    //   // We may output these samples by comparing keys of phenotype and
    //   // phenotypeNameInOrder
    // }
    dataLoader.loadCovariate(FLAG_cov, FLAG_covName);
    matchCovariateAndVCF("missing covariate", &dataLoader, &ge);

    // // load covariate
    // Matrix covariate;
    // HandleMissingCov handleMissingCov = COVARIATE_DROP;
    // if (FLAG_imputeCov) {
    //   handleMissingCov = COVARIATE_IMPUTE;
    // }
    // if (FLAG_cov.empty() && !FLAG_covName.empty()) {
    //   logger->info("Use phenotype file as covariate file [ %s ]",
    //                FLAG_pheno.c_str());
    //   FLAG_cov = FLAG_pheno;
    // }
    // if (!FLAG_cov.empty()) {
    //   logger->info("Begin to read covariate file.");
    //   std::vector<std::string> columnNamesInCovariate;
    //   std::set<std::string> sampleToDropInCovariate;
    //   int ret = loadCovariate(FLAG_cov.c_str(), phenotypeNameInOrder,
    //                           FLAG_covName.c_str(), handleMissingCov,
    //                           &covariate,
    //                           &columnNamesInCovariate,
    //                           &sampleToDropInCovariate);
    //   if (ret < 0) {
    //     logger->error("Load covariate file failed !");
    //     exit(1);
    //   }

    //   // drop phenotype samples
    //   if (!sampleToDropInCovariate.empty()) {
    //     int idx = 0;
    //     int n = phenotypeNameInOrder.size();
    //     for (int i = 0; i < n; ++i) {
    //       if (sampleToDropInCovariate.count(phenotypeNameInOrder[i]) !=
    //           0) {  // need to drop
    //         continue;
    //       }
    //       phenotypeNameInOrder[idx] = phenotypeNameInOrder[i];
    //       phenotypeInOrder[idx] = phenotypeInOrder[i];
    //       idx++;
    //     }
    //     phenotypeNameInOrder.resize(idx);
    //     phenotypeInOrder.resize(idx);
    //     logger->warn(
    //         "[ %zu ] sample phenotypes are dropped due to lacking
    //         covariates.",
    //         sampleToDropInCovariate.size());
    //   }
    //   // drop vcf samples;
    //   for (std::set<std::string>::const_iterator iter =
    //            sampleToDropInCovariate.begin();
    //        iter != sampleToDropInCovariate.end(); ++iter) {
    //     ge.excludePeople(iter->c_str());
    //   }
    // }
  } else {
    dataLoader.loadMultiplePhenotype(FLAG_multiplePheno, FLAG_pheno, FLAG_cov);

    matchPhenotypeAndVCF("missing phenotype", &dataLoader, &ge);
    matchCovariateAndVCF("missing covariate", &dataLoader, &ge);
  }

  dataLoader.loadSex();
  if (FLAG_sex) {
    dataLoader.useSexAsCovariate();
    matchCovariateAndVCF("missing sex", &dataLoader, &ge);
  }
  // // load sex
  // std::vector<int> sex;
  // if (loadSex(FLAG_pheno, phenotypeNameInOrder, &sex)) {
  //   logger->error("Cannot load sex of samples from phenotype file");
  //   exit(1);
  // }

  // if (FLAG_sex) {            // append sex in covariate
  //   std::vector<int> index;  // mark missing samples
  //   int numMissing = findMissingSex(sex, &index);
  //   logger->info("Futher exclude %d samples with missing sex", numMissing);
  //   removeByIndex(index, &sex);
  //   excludeSamplesByIndex(index, &ge, &phenotypeNameInOrder,
  //   &phenotypeInOrder,
  //                         &covariate);
  //   appendToMatrix("Sex", sex, &covariate);
  // }

  if (!FLAG_condition.empty()) {
    dataLoader.loadMarkerAsCovariate(FLAG_inVcf, FLAG_condition);
    matchCovariateAndVCF("missing in conditioned marker(s)", &dataLoader, &ge);
  }
  // // load conditional markers
  // if (!FLAG_condition.empty()) {
  //   Matrix geno;
  //   std::vector<std::string> rowLabel;
  //   if (loadMarkerFromVCF(FLAG_inVcf, FLAG_condition, &rowLabel, &geno) < 0)
  //   {
  //     logger->error("Load conditional markers [ %s ] from [ %s ] failed.",
  //                   FLAG_condition.c_str(), FLAG_inVcf.c_str());
  //     exit(1);
  //   }
  //   if (appendGenotype(&covariate, phenotypeNameInOrder, geno, rowLabel) < 0)
  //   {
  //     logger->error(
  //         "Failed to combine conditional markers [ %s ] from [ %s ] failed.",
  //         FLAG_condition.c_str(), FLAG_inVcf.c_str());
  //     exit(1);
  //   }
  // }

  dataLoader.checkConstantCovariate();
  // // check if some covariates are constant for all samples
  // // e.g. user may include covariate "1" in addition to intercept
  // //      in such case, we will give a fatal error
  // for (int i = 0; i < covariate.cols; ++i) {
  //   std::set<double> s;
  //   s.clear();
  //   for (int j = 0; j < covariate.rows; ++j) {
  //     s.insert(covariate[j][i]);
  //   }
  //   if (s.size() == 1) {
  //     logger->error(
  //         "Covariate [ %s ] equals [ %g ] for all samples, cannot fit "
  //         "model...\n",
  //         covariate.GetColumnLabel(i), *s.begin());
  //     exit(1);
  //   }
  // }

  g_SummaryHeader = new SummaryHeader;
  g_SummaryHeader->recordCovariate(dataLoader.getCovariate());

  // record raw phenotype
  g_SummaryHeader->recordPhenotype("Trait",
                                   dataLoader.getPhenotype().extractCol(0));

  // adjust phenotype
  // bool binaryPhenotype;
  if (FLAG_qtl) {
    // binaryPhenotype = false;
    dataLoader.setTraitType(DataLoader::PHENOTYPE_QTL);
    logger->info("-- Force quantitative trait mode -- ");
  } else {
    if (dataLoader.detectPhenotypeType() == DataLoader::PHENOTYPE_BINARY) {
      logger->warn("-- Enabling binary phenotype mode -- ");
      dataLoader.setTraitType(DataLoader::PHENOTYPE_BINARY);

    } else {
      dataLoader.setTraitType(DataLoader::PHENOTYPE_QTL);
    }
    // binaryPhenotype = isBinaryPhenotype(phenotypeInOrder);
    // if (binaryPhenotype) {
    //   logger->warn("-- Enabling binary phenotype mode -- ");
    //   convertBinaryPhenotype(&phenotypeInOrder);
    // }
  }

  if (FLAG_useResidualAsPhenotype) {
    dataLoader.useResidualAsPhenotype();
    g_SummaryHeader->recordEstimation(dataLoader.getEstimation());
  }
  // // use residual as phenotype
  // if (FLAG_useResidualAsPhenotype) {
  //   if (binaryPhenotype) {
  //     logger->warn(
  //         "WARNING: Skip transforming binary phenotype, although you want to
  //         "
  //         "use residual as phenotype!");
  //   } else {
  //     if (covariate.cols > 0) {
  //       LinearRegression lr;
  //       Vector pheno;
  //       Matrix covAndInt;
  //       copy(phenotypeInOrder, &pheno);
  //       copyCovariateAndIntercept(covariate.rows, covariate, &covAndInt);
  //       if (!lr.FitLinearModel(covAndInt, pheno)) {
  //         logger->error(
  //             "Cannot fit model: [ phenotype ~ 1 + covariates ], now use the
  //             "
  //             "original phenotype");
  //       } else {
  //         const int n = lr.GetResiduals().Length();
  //         for (int i = 0; i < n; ++i) {
  //           phenotypeInOrder[i] = lr.GetResiduals()[i];
  //         }
  //         covariate.Dimension(0, 0);
  //         logger->info(
  //             "DONE: Fit model [ phenotype ~ 1 + covariates ] and model "
  //             "residuals will be used as responses.");
  //       }
  //     } else {  // no covaraites
  //       centerVector(&phenotypeInOrder);
  //       logger->info("DONE: Use residual as phenotype by centerng it");
  //     }
  //   }
  // }

  if (FLAG_inverseNormal) {
    dataLoader.inverseNormalizePhenotype();
    g_SummaryHeader->setInverseNormalize(FLAG_inverseNormal);
  }
  // // phenotype transformation
  // if (FLAG_inverseNormal) {
  //   if (binaryPhenotype) {
  //     logger->warn(
  //         "WARNING: Skip transforming binary phenotype, although you required
  //         "
  //         "inverse normalization!");
  //   } else {
  //     logger->info("Now applying inverse normalize transformation.");
  //     inverseNormalizeLikeMerlin(&phenotypeInOrder);
  //     g_SummaryHeader->setInverseNormalize(FLAG_inverseNormal);
  //     logger->info("DONE: inverse normal transformation finished.");
  //   }
  // }

  g_SummaryHeader->recordPhenotype("AnalyzedTrait",
                                   dataLoader.getPhenotype().extractCol(0));

  if (dataLoader.getPhenotype().nrow() == 0) {
    logger->fatal("There are 0 samples with valid phenotypes, quitting...");
    exit(1);
  }
  // if (phenotypeInOrder.empty()) {
  //   logger->fatal("There are 0 samples with valid phenotypes, quitting...");
  //   exit(1);
  // }

  logger->info("Analysis begins with [ %d ] samples...",
               dataLoader.getPhenotype().nrow());
  //////////////////////////////////////////////////////////////////////////////
  // prepare each model
  bool singleVariantMode = FLAG_modelSingle.size() || FLAG_modelMeta.size();
  bool groupVariantMode = (FLAG_modelBurden.size() || FLAG_modelVT.size() ||
                           FLAG_modelKernel.size());
  if (singleVariantMode && groupVariantMode) {
    logger->error("Cannot support both single variant and region based tests");
    exit(1);
  }

  ModelManager modelManager(FLAG_outPrefix);
  // set up models in qtl/binary modes
  if (dataLoader.isBinaryPhenotype()) {
    modelManager.setBinaryOutcome();
    matchPhenotypeAndVCF("missing phenotype (not case/control)", &dataLoader,
                         &ge);
  } else {
    modelManager.setQuantitativeOutcome();
  }
  // create models
  modelManager.create("single", FLAG_modelSingle);
  modelManager.create("burden", FLAG_modelBurden);
  modelManager.create("vt", FLAG_modelVT);
  modelManager.create("kernel", FLAG_modelKernel);
  modelManager.create("meta", FLAG_modelMeta);
  if (FLAG_outputRaw) {
    modelManager.create("outputRaw", "dump");
  }

  const std::vector<ModelFitter*>& model = modelManager.getModel();
  const std::vector<FileWriter*>& fOuts = modelManager.getResultFile();
  const size_t numModel = model.size();

  // TODO: optimize this by avoidding data copying
  Matrix phenotypeMatrix;
  Matrix covariate;
  toMatrix(dataLoader.getPhenotype(), &phenotypeMatrix);
  toMatrix(dataLoader.getCovariate(), &covariate);

  // determine VCF file reading pattern
  // current support:
  // * line by line ( including range selection)
  // * gene by gene
  // * range by range
  std::string rangeMode = "Single";
  if (FLAG_geneFile.size() && (FLAG_setFile.size() || FLAG_setList.size())) {
    logger->error("Cannot specify both gene file and set file.");
    exit(1);
  }

  if (!FLAG_gene.empty() && FLAG_geneFile.empty()) {
    logger->error("Please provide gene file for gene bases analysis.");
    exit(1);
  }
  OrderedMap<std::string, RangeList> geneRange;
  if (FLAG_geneFile.size()) {
    rangeMode = "Gene";
    int ret =
        loadGeneFile(FLAG_geneFile.c_str(), FLAG_gene.c_str(), &geneRange);
    if (ret < 0 || geneRange.size() == 0) {
      logger->error("Error loading gene file or gene list is empty!");
      return -1;
    } else {
      logger->info("Loaded [ %zu ] genes.", geneRange.size());
    }
  }

  if (!FLAG_set.empty() && FLAG_setFile.empty()) {
    logger->error("Please provide set file for set bases analysis.");
    exit(1);
  }
  if (FLAG_setFile.size()) {
    rangeMode = "Range";
    int ret = loadRangeFile(FLAG_setFile.c_str(), FLAG_set.c_str(), &geneRange);
    if (ret < 0 || geneRange.size() == 0) {
      logger->error("Error loading set file or set list is empty!");
      return -1;
    } else {
      logger->info("Loaded [ %zu ] set to tests.", geneRange.size());
    }
  }
  if (FLAG_setList.size()) {
    rangeMode = "Range";
    int ret = appendListToRange(FLAG_setList, &geneRange);
    if (ret < 0) {
      logger->error("Error loading set list or set list is empty!");
      return -1;
    }
  }

  DataConsolidator dc;
  dc.setSex(&dataLoader.getSex());
  dc.setFormula(&dataLoader.getFormula());
  dc.setGenotypeCounter(ge.getGenotypeCounter());

  // load kinshp if needed by family models
  if (modelManager.hasFamilyModel() ||
      (!FLAG_modelMeta.empty() && !FLAG_kinship.empty())) {
    logger->info("Family-based model specified. Loading kinship file...");

    // process auto kinship
    if (dc.setKinshipSample(dataLoader.getPhenotype().getRowName()) ||
        dc.setKinshipFile(DataConsolidator::KINSHIP_AUTO, FLAG_kinship) ||
        dc.setKinshipEigenFile(DataConsolidator::KINSHIP_AUTO,
                               FLAG_kinshipEigen) ||
        dc.loadKinship(DataConsolidator::KINSHIP_AUTO)) {
      logger->error(
          "Failed to load autosomal kinship (you may use vcf2kinship to "
          "generate one).");
      exit(1);
    }

    if (dc.setKinshipFile(DataConsolidator::KINSHIP_X, FLAG_xHemiKinship) ||
        dc.setKinshipEigenFile(DataConsolidator::KINSHIP_X,
                               FLAG_xHemiKinshipEigen) ||
        dc.loadKinship(DataConsolidator::KINSHIP_X)) {
      logger->warn(
          "Autosomal kinship loaded, but no hemizygote region kinship "
          "provided, some sex chromosome tests will be skipped.");
      // keep the program going
    }
  } else if (!FLAG_kinship.empty() && FLAG_modelMeta.empty()) {
    logger->info(
        "Family-based model not specified. Options related to kinship will be "
        "ignored here.");
  }

  // set imputation method
  if (FLAG_impute.empty()) {
    logger->info("Impute missing genotype to mean (by default)");
    dc.setStrategy(DataConsolidator::IMPUTE_MEAN);
  } else if (FLAG_impute == "mean") {
    logger->info("Impute missing genotype to mean");
    dc.setStrategy(DataConsolidator::IMPUTE_MEAN);
  } else if (FLAG_impute == "hwe") {
    logger->info("Impute missing genotype by HWE");
    dc.setStrategy(DataConsolidator::IMPUTE_HWE);
  } else if (FLAG_impute == "drop") {
    logger->info("Drop missing genotypes");
    dc.setStrategy(DataConsolidator::DROP);
  }
  dc.setPhenotypeName(dataLoader.getPhenotype().getRowName());

  // set up par region
  ParRegion parRegion(FLAG_xLabel, FLAG_xParRegion);
  dc.setParRegion(&parRegion);

  // genotype will be extracted and stored
  Matrix& genotype = dc.getOriginalGenotype();
  if (FLAG_freqUpper > 0) {
    ge.setSiteFreqMax(FLAG_freqUpper);
    logger->info("Set upper minor allele frequency limit to %g",
                 FLAG_freqUpper);
  }
  if (FLAG_freqLower > 0) {
    ge.setSiteFreqMin(FLAG_freqLower);
    logger->info("Set lower minor allele frequency limit to %g",
                 FLAG_freqLower);
  }

  // handle sex chromosome
  ge.setParRegion(&parRegion);
  ge.setSex(&dataLoader.getSex());

  // use dosage instead GT
  if (!FLAG_dosageTag.empty()) {
    ge.setDosageTag(FLAG_dosageTag);
    logger->info("Use dosage genotype from VCF flag %s.",
                 FLAG_dosageTag.c_str());
  }

  // genotype QC options
  if (FLAG_indvDepthMin > 0) {
    ge.setGDmin(FLAG_indvDepthMin);
    logger->info("Minimum GD set to %d (or marked as missing genotype).",
                 FLAG_indvDepthMin);
  }
  if (FLAG_indvDepthMax > 0) {
    ge.setGDmax(FLAG_indvDepthMax);
    logger->info("Maximum GD set to %d (or marked as missing genotype).",
                 FLAG_indvDepthMax);
  }
  if (FLAG_indvQualMin > 0) {
    ge.setGQmin(FLAG_indvQualMin);
    logger->info("Minimum GQ set to %d (or marked as missing genotype).",
                 FLAG_indvQualMin);
  }

  dc.preRegressionCheck(phenotypeMatrix, covariate);

  logger->info("Analysis started");
  Result& buf = dc.getResult();

  // we have three modes:
  // * single variant reading, single variant test
  // * range variant reading, single variant test
  // * range variant reading, group variant test
  if (rangeMode == "Single" && singleVariantMode) {  // use line by line mode
    buf.addHeader("CHROM");
    buf.addHeader("POS");
    buf.addHeader("REF");
    buf.addHeader("ALT");
    buf.addHeader("N_INFORMATIVE");

    // output headers
    for (size_t m = 0; m < model.size(); m++) {
      model[m]->writeHeader(fOuts[m], buf);
    }

    int variantProcessed = 0;
    while (true) {
      buf.clearValue();
      int ret = ge.extractSingleGenotype(&genotype, &buf);

      if (ret == GenotypeExtractor::FILE_END) {  // reach file end
        break;
      }
      if (ret == GenotypeExtractor::FAIL_FILTER) {
        continue;
      }
      if (ret != GenotypeExtractor::SUCCEED) {
        logger->error("Extract genotype failed at site: %s:%s!",
                      buf["CHROM"].c_str(), buf["POS"].c_str());
        continue;
      }
      if (genotype.cols == 0) {
        logger->warn("Extract [ %s:%s ] has 0 variants, skipping",
                     buf["CHROM"].c_str(), buf["POS"].c_str());
        continue;
      }

      ++variantProcessed;
      dc.consolidate(phenotypeMatrix, covariate, genotype);

      buf.updateValue("N_INFORMATIVE", toString(genotype.rows));

      // fit each model
      for (size_t m = 0; m != numModel; m++) {
        model[m]->reset();
        model[m]->fit(&dc);
        model[m]->writeOutput(fOuts[m], buf);
      }
    }
    logger->info("Analyzed [ %d ] variants", variantProcessed);
  } else if (rangeMode != "Single" &&
             singleVariantMode) {  // read by gene/range model, single variant
    // test
    buf.addHeader(rangeMode);
    buf.addHeader("CHROM");
    buf.addHeader("POS");
    buf.addHeader("REF");
    buf.addHeader("ALT");
    buf.addHeader("N_INFORMATIVE");

    // output headers
    for (size_t m = 0; m < numModel; m++) {
      model[m]->writeHeader(fOuts[m], buf);
    }
    std::string geneName;
    RangeList rangeList;
    int variantProcessed = 0;
    for (size_t i = 0; i < geneRange.size(); ++i) {
      geneRange.at(i, &geneName, &rangeList);
      ge.setRange(rangeList);

      while (true) {
        buf.clearValue();
        int ret = ge.extractSingleGenotype(&genotype, &buf);
        if (ret == GenotypeExtractor::FILE_END) {  // reach end of this region
          break;
        }
        if (ret == GenotypeExtractor::FAIL_FILTER) {
          continue;
        }
        if (ret != GenotypeExtractor::SUCCEED) {
          logger->error("Extract genotype failed for gene %s!",
                        geneName.c_str());
          continue;
        }
        if (genotype.cols == 0) {
          logger->warn("Gene %s has 0 variants, skipping", geneName.c_str());
          continue;
        }

        ++variantProcessed;
        dc.consolidate(phenotypeMatrix, covariate, genotype);

        buf.updateValue(rangeMode, geneName);
        buf.updateValue("N_INFORMATIVE", genotype.rows);

        // #pragma omp parallel for
        for (size_t m = 0; m != numModel; m++) {
          model[m]->reset();
          model[m]->fit(&dc);
          model[m]->writeOutput(fOuts[m], buf);
        }
      }
    }
    logger->info("Analyzed [ %d ] variants from [ %d ] genes/regions",
                 variantProcessed, (int)geneRange.size());
  } else if (rangeMode != "Single" &&
             groupVariantMode) {  // read by gene/range mode, group variant
                                  // test
    buf.addHeader(rangeMode);
    buf.addHeader("RANGE");
    buf.addHeader("N_INFORMATIVE");
    buf.addHeader("NumVar");
    buf.addHeader("NumPolyVar");

    // output headers
    for (size_t m = 0; m < numModel; m++) {
      model[m]->writeHeader(fOuts[m], buf);
    }
    std::string geneName;
    RangeList rangeList;
    int variantProcessed = 0;
    ge.enableAutoMerge();
    for (size_t i = 0; i < geneRange.size(); ++i) {
      geneRange.at(i, &geneName, &rangeList);
      ge.setRange(rangeList);

      buf.clearValue();
      int ret = ge.extractMultipleGenotype(&genotype);

      if (ret != GenotypeExtractor::SUCCEED) {
        logger->error("Extract genotype failed for gene %s!", geneName.c_str());
        continue;
      }
      if (genotype.cols == 0) {
        logger->info("Gene %s has 0 variants, skipping", geneName.c_str());
        continue;
      }

      variantProcessed += genotype.cols;  // genotype is people by marker
      dc.consolidate(phenotypeMatrix, covariate, genotype);

      buf.updateValue(rangeMode, geneName);
      buf.updateValue("RANGE", rangeList.toString());
      buf.updateValue("N_INFORMATIVE", genotype.rows);
      buf.updateValue("NumVar", genotype.cols);
      buf.updateValue("NumPolyVar",
                      dc.getFlippedToMinorPolymorphicGenotype().cols);

      // #ifdef _OPENMP
      // #pragma omp parallel for
      // #endif
      for (size_t m = 0; m != numModel; m++) {
        model[m]->reset();
        model[m]->fit(&dc);
        model[m]->writeOutput(fOuts[m], buf);
      }
    }
    logger->info("Analyzed [ %d ] variants from [ %d ] genes/regions",
                 variantProcessed, (int)geneRange.size());
  } else {
    logger->error(
        "Unsupported reading mode and test modes! (need more parameters?)");
    exit(1);
  }

  // Resource cleaning up
  modelManager.close();
  delete g_SummaryHeader;

  time_t endTime = time(0);
  logger->info("Analysis ends at: %s", currentTime().c_str());
  int elapsedSecond = (int)(endTime - startTime);
  logger->info("Analysis took %d seconds", elapsedSecond);

  return 0;
}
Exemplo n.º 18
0
int main(int argc, char** argv) {
  PARSE_PARAMETER(argc, argv);

  if (FLAG_help) {
    PARAMETER_HELP();
    return 0;
  }

  welcome();
  PARAMETER_STATUS();
  if (FLAG_REMAIN_ARG.size() > 0) {
    fprintf(stderr, "Unparsed arguments: ");
    for (unsigned int i = 0; i < FLAG_REMAIN_ARG.size(); i++) {
      fprintf(stderr, " %s", FLAG_REMAIN_ARG[i].c_str());
    }
    exit(1);
  }

  if (!FLAG_outPrefix.size()) FLAG_outPrefix = "rvtest";

  if ((FLAG_inVcf.empty() ? 0 : 1) + (FLAG_inBgen.empty() ? 0 : 1) +
          (FLAG_inKgg.empty() ? 0 : 1) !=
      1) {
    fprintf(stderr,
            "Please provide one type of input file using: --inVcf, --inBgen or "
            "--inKgg\n");
    exit(1);
  }

  // check new version
  if (!FLAG_noweb) {
    VersionChecker ver;
    if (ver.retrieveRemoteVersion("http://zhanxw.com/rvtests/version") < 0) {
      fprintf(stderr,
              "Retrieve remote version failed, use '--noweb' to skip.\n");
    } else {
      ver.setLocalVersion(VERSION);
      if (ver.isRemoteVersionNewer()) {
        fprintf(stderr, "New version of rvtests is available:");
        ver.printRemoteContent();
      }
    }
  }

  // start logging
  Logger _logger((FLAG_outPrefix + ".log").c_str());
  logger = &_logger;
  logger->info("Program version: %s", VERSION);
  logger->infoToFile("Git Version: %s", GIT_VERSION);
  logger->infoToFile("Parameters BEGIN");
  PARAMETER_INSTANCE().WriteToFile(logger->getHandle());
  logger->infoToFile("Parameters END");
  logger->sync();

// set up multithreading
#ifdef _OPENMP
  if (FLAG_numThread <= 0) {
    fprintf(stderr, "Invalid number of threads [ %d ], reset to single thread",
            FLAG_numThread);
    omp_set_num_threads(1);
  } else if (FLAG_numThread > omp_get_max_threads()) {
    int maxThreads = omp_get_max_threads();
    fprintf(stderr,
            "Reduced your specified number of threads to the maximum of system "
            "limit [ %d ]",
            maxThreads);
    omp_set_num_threads(maxThreads);
  } else if (FLAG_numThread == 1) {
    // need to set to one thread, otherwise all CPUs may be used
    omp_set_num_threads(1);
  } else {
    logger->info("Set number of threads = [ %d ]", FLAG_numThread);
    omp_set_num_threads(FLAG_numThread);
  }
#endif

  // start analysis
  time_t startTime = time(0);
  logger->info("Analysis started at: %s", currentTime().c_str());

  GenotypeExtractor* ge = NULL;
  if (!FLAG_inVcf.empty()) {
    ge = new VCFGenotypeExtractor(FLAG_inVcf);
  } else if (!FLAG_inBgen.empty()) {
    ge = new BGenGenotypeExtractor(FLAG_inBgen, FLAG_inBgenSample);
  } else if (!FLAG_inKgg.empty()) {
    ge = new KGGGenotypeExtractor(FLAG_inKgg);
  } else {
    assert(false);
  }

  // set range filters here
  ge->setRangeList(FLAG_rangeList.c_str());
  ge->setRangeFile(FLAG_rangeFile.c_str());

  // set people filters here
  if (FLAG_peopleIncludeID.size() || FLAG_peopleIncludeFile.size()) {
    ge->excludeAllPeople();
    ge->includePeople(FLAG_peopleIncludeID.c_str());
    ge->includePeopleFromFile(FLAG_peopleIncludeFile.c_str());
  }
  ge->excludePeople(FLAG_peopleExcludeID.c_str());
  ge->excludePeopleFromFile(FLAG_peopleExcludeFile.c_str());

  if (!FLAG_siteFile.empty()) {
    ge->setSiteFile(FLAG_siteFile);
    logger->info("Restrict analysis based on specified site file [ %s ]",
                 FLAG_siteFile.c_str());
  }
  if (FLAG_siteDepthMin > 0) {
    ge->setSiteDepthMin(FLAG_siteDepthMin);
    logger->info("Set site depth minimum to %d", FLAG_siteDepthMin);
  }
  if (FLAG_siteDepthMax > 0) {
    ge->setSiteDepthMax(FLAG_siteDepthMax);
    logger->info("Set site depth maximum to %d", FLAG_siteDepthMax);
  }
  if (FLAG_siteMACMin > 0) {
    ge->setSiteMACMin(FLAG_siteMACMin);
    logger->info("Set site minimum MAC to %d", FLAG_siteDepthMin);
  }
  if (FLAG_annoType != "") {
    ge->setAnnoType(FLAG_annoType.c_str());
    logger->info("Set annotype type filter to %s", FLAG_annoType.c_str());
  }

  std::vector<std::string> vcfSampleNames;
  ge->getPeopleName(&vcfSampleNames);
  logger->info("Loaded [ %zu ] samples from genotype files",
               vcfSampleNames.size());

  DataLoader dataLoader;
  dataLoader.setPhenotypeImputation(FLAG_imputePheno);
  dataLoader.setCovariateImputation(FLAG_imputeCov);

  if (FLAG_multiplePheno.empty()) {
    dataLoader.loadPhenotype(FLAG_pheno, FLAG_mpheno, FLAG_phenoName);
    // // load phenotypes
    // std::map<std::string, double> phenotype;
    // if (FLAG_pheno.empty()) {
    //   logger->error("Cannot do association when phenotype is missing!");
    //   return -1;
    // }

    // // check if alternative phenotype columns are used
    // if (!FLAG_mpheno.empty() && !FLAG_phenoName.empty()) {
    //   logger->error("Please specify either --mpheno or --pheno-name");
    //   return -1;
    // }
    // if (!FLAG_mpheno.empty()) {
    //   int col = atoi(FLAG_mpheno);
    //   int ret = loadPedPhenotypeByColumn(FLAG_pheno.c_str(), &phenotype,
    //   col);
    //   if (ret < 0) {
    //     logger->error("Loading phenotype failed!");
    //     return -1;
    //   }
    // } else if (!FLAG_phenoName.empty()) {
    //   int ret = loadPedPhenotypeByHeader(FLAG_pheno.c_str(), &phenotype,
    //                                      FLAG_phenoName.c_str());
    //   if (ret < 0) {
    //     logger->error("Loading phenotype failed!");
    //     return -1;
    //   }
    // } else {
    //   int col = 1;  // default use the first phenotype
    //   int ret = loadPedPhenotypeByColumn(FLAG_pheno.c_str(), &phenotype,
    //   col);
    //   if (ret < 0) {
    //     logger->error("Loading phenotype failed!");
    //     return -1;
    //   }
    // }
    // logger->info("Loaded [ %zu ] sample phenotypes.", phenotype.size());

    // rearrange phenotypes
    // drop samples from phenotype or vcf
    matchPhenotypeAndVCF("missing phenotype", &dataLoader, ge);
    // // phenotype names (vcf sample names) arranged in the same order as in
    // VCF
    // std::vector<std::string> phenotypeNameInOrder;
    // std::vector<double>
    //     phenotypeInOrder;  // phenotype arranged in the same order as in VCF
    // rearrange(phenotype, vcfSampleNames, &vcfSampleToDrop,
    // &phenotypeNameInOrder,
    //           &phenotypeInOrder, FLAG_imputePheno);
    // if (vcfSampleToDrop.size()) {
    //   // exclude this sample from parsing VCF
    //   ge->excludePeople(vcfSampleToDrop);
    //   // output dropped samples
    //   for (size_t i = 0; i < vcfSampleToDrop.size(); ++i) {
    //     if (i == 0)
    //       logger->warn(
    //           "Total [ %zu ] samples are dropped from VCF file due to missing
    //           "
    //           "phenotype",
    //           vcfSampleToDrop.size());
    //     if (i >= 10) {
    //       logger->warn(
    //           "Skip outputting additional [ %d ] samples with missing "
    //           "phenotypes.",
    //           ((int)vcfSampleToDrop.size() - 10));
    //       break;
    //     }
    //     logger->warn("Drop sample [ %s ] from VCF file due to missing
    //     phenotype",
    //                  (vcfSampleToDrop)[i].c_str());
    //   }
    //   // logger->warn("Drop %zu sample from VCF file since we don't have
    //   their
    //   // phenotypes", vcfSampleToDrop.size());
    // }
    // if (phenotypeInOrder.size() != phenotype.size()) {
    //   logger->warn(
    //       "Drop [ %d ] samples from phenotype file due to missing genotypes
    //       from "
    //       "VCF files",
    //       (int)(phenotype.size() - phenotypeInOrder.size()));
    //   // We may output these samples by comparing keys of phenotype and
    //   // phenotypeNameInOrder
    // }
    dataLoader.loadCovariate(FLAG_cov, FLAG_covName);
    matchCovariateAndVCF("missing covariate", &dataLoader, ge);
    // // load covariate
    // Matrix covariate;
    // HandleMissingCov handleMissingCov = COVARIATE_DROP;
    // if (FLAG_imputeCov) {
    //   handleMissingCov = COVARIATE_IMPUTE;
    // }
    // if (FLAG_cov.empty() && !FLAG_covName.empty()) {
    //   logger->info("Use phenotype file as covariate file [ %s ]",
    //                FLAG_pheno.c_str());
    //   FLAG_cov = FLAG_pheno;
    // }
    // if (!FLAG_cov.empty()) {
    //   logger->info("Begin to read covariate file.");
    //   std::vector<std::string> columnNamesInCovariate;
    //   std::set<std::string> sampleToDropInCovariate;
    //   int ret = loadCovariate(FLAG_cov.c_str(), phenotypeNameInOrder,
    //                           FLAG_covName.c_str(), handleMissingCov,
    //                           &covariate,
    //                           &columnNamesInCovariate,
    //                           &sampleToDropInCovariate);
    //   if (ret < 0) {
    //     logger->error("Load covariate file failed !");
    //     exit(1);
    //   }

    //   // drop phenotype samples
    //   if (!sampleToDropInCovariate.empty()) {
    //     int idx = 0;
    //     int n = phenotypeNameInOrder.size();
    //     for (int i = 0; i < n; ++i) {
    //       if (sampleToDropInCovariate.count(phenotypeNameInOrder[i]) !=
    //           0) {  // need to drop
    //         continue;
    //       }
    //       phenotypeNameInOrder[idx] = phenotypeNameInOrder[i];
    //       phenotypeInOrder[idx] = phenotypeInOrder[i];
    //       idx++;
    //     }
    //     phenotypeNameInOrder.resize(idx);
    //     phenotypeInOrder.resize(idx);
    //     logger->warn(
    //         "[ %zu ] sample phenotypes are dropped due to lacking
    //         covariates.",
    //         sampleToDropInCovariate.size());
    //   }
    //   // drop vcf samples;
    //   for (std::set<std::string>::const_iterator iter =
    //            sampleToDropInCovariate.begin();
    //        iter != sampleToDropInCovariate.end(); ++iter) {
    //     ge->excludePeople(iter->c_str());
    //   }
    // }
  } else {
    dataLoader.loadMultiplePhenotype(FLAG_multiplePheno, FLAG_pheno, FLAG_cov);

    matchPhenotypeAndVCF("missing phenotype", &dataLoader, ge);
    matchCovariateAndVCF("missing covariate", &dataLoader, ge);
  }

  dataLoader.loadSex();
  if (FLAG_sex) {
    dataLoader.useSexAsCovariate();
    matchCovariateAndVCF("missing sex", &dataLoader, ge);
  }
  // // load sex
  // std::vector<int> sex;
  // if (loadSex(FLAG_pheno, phenotypeNameInOrder, &sex)) {
  //   logger->error("Cannot load sex of samples from phenotype file");
  //   exit(1);
  // }

  // if (FLAG_sex) {            // append sex in covariate
  //   std::vector<int> index;  // mark missing samples
  //   int numMissing = findMissingSex(sex, &index);
  //   logger->info("Futher exclude %d samples with missing sex", numMissing);
  //   removeByIndex(index, &sex);
  //   excludeSamplesByIndex(index, &ge, &phenotypeNameInOrder,
  //   &phenotypeInOrder,
  //                         &covariate);
  //   appendToMatrix("Sex", sex, &covariate);
  // }

  if (!FLAG_condition.empty()) {
    dataLoader.loadMarkerAsCovariate(FLAG_inVcf, FLAG_condition);
    matchCovariateAndVCF("missing in conditioned marker(s)", &dataLoader, ge);
  }
  // // load conditional markers
  // if (!FLAG_condition.empty()) {
  //   Matrix geno;
  //   std::vector<std::string> rowLabel;
  //   if (loadMarkerFromVCF(FLAG_inVcf, FLAG_condition, &rowLabel, &geno) < 0)
  //   {
  //     logger->error("Load conditional markers [ %s ] from [ %s ] failed.",
  //                   FLAG_condition.c_str(), FLAG_inVcf.c_str());
  //     exit(1);
  //   }
  //   if (appendGenotype(&covariate, phenotypeNameInOrder, geno, rowLabel) < 0)
  //   {
  //     logger->error(
  //         "Failed to combine conditional markers [ %s ] from [ %s ] failed.",
  //         FLAG_condition.c_str(), FLAG_inVcf.c_str());
  //     exit(1);
  //   }
  // }

  dataLoader.checkConstantCovariate();
  // // check if some covariates are constant for all samples
  // // e.g. user may include covariate "1" in addition to intercept
  // //      in such case, we will give a fatal error
  // for (int i = 0; i < covariate.cols; ++i) {
  //   std::set<double> s;
  //   s.clear();
  //   for (int j = 0; j < covariate.rows; ++j) {
  //     s.insert(covariate(j,i));
  //   }
  //   if (s.size() == 1) {
  //     logger->error(
  //         "Covariate [ %s ] equals [ %g ] for all samples, cannot fit "
  //         "model...\n",
  //         covariate.GetColumnLabel(i), *s.begin());
  //     exit(1);
  //   }
  // }

  g_SummaryHeader = new SummaryHeader;
  g_SummaryHeader->recordCovariate(dataLoader.getCovariate());

  // record raw phenotype
  g_SummaryHeader->recordPhenotype("Trait",
                                   dataLoader.getPhenotype().extractCol(0));
  // adjust phenotype
  // bool binaryPhenotype;
  if (FLAG_qtl) {
    // binaryPhenotype = false;
    dataLoader.setTraitType(DataLoader::PHENOTYPE_QTL);
    logger->info("-- Force quantitative trait mode -- ");
  } else {
    if (dataLoader.detectPhenotypeType() == DataLoader::PHENOTYPE_BINARY) {
      logger->warn("-- Enabling binary phenotype mode -- ");
      dataLoader.setTraitType(DataLoader::PHENOTYPE_BINARY);

    } else {
      dataLoader.setTraitType(DataLoader::PHENOTYPE_QTL);
    }
    // binaryPhenotype = isBinaryPhenotype(phenotypeInOrder);
    // if (binaryPhenotype) {
    //   logger->warn("-- Enabling binary phenotype mode -- ");
    //   convertBinaryPhenotype(&phenotypeInOrder);
    // }
  }

  if (FLAG_useResidualAsPhenotype) {
    dataLoader.useResidualAsPhenotype();
    g_SummaryHeader->recordEstimation(dataLoader.getEstimation());
  }
  // // use residual as phenotype
  // if (FLAG_useResidualAsPhenotype) {
  //   if (binaryPhenotype) {
  //     logger->warn(
  //         "WARNING: Skip transforming binary phenotype, although you want to
  //         "
  //         "use residual as phenotype!");
  //   } else {
  //     if (covariate.cols > 0) {
  //       LinearRegression lr;
  //       Vector pheno;
  //       Matrix covAndInt;
  //       copy(phenotypeInOrder, &pheno);
  //       copyCovariateAndIntercept(covariate.rows, covariate, &covAndInt);
  //       if (!lr.FitLinearModel(covAndInt, pheno)) {
  //         logger->error(
  //             "Cannot fit model: [ phenotype ~ 1 + covariates ], now use the
  //             "
  //             "original phenotype");
  //       } else {
  //         const int n = lr.GetResiduals().Length();
  //         for (int i = 0; i < n; ++i) {
  //           phenotypeInOrder[i] = lr.GetResiduals()[i];
  //         }
  //         covariate.Dimension(0, 0);
  //         logger->info(
  //             "DONE: Fit model [ phenotype ~ 1 + covariates ] and model "
  //             "residuals will be used as responses.");
  //       }
  //     } else {  // no covaraites
  //       centerVector(&phenotypeInOrder);
  //       logger->info("DONE: Use residual as phenotype by centerng it");
  //     }
  //   }
  // }

  if (FLAG_inverseNormal) {
    dataLoader.inverseNormalizePhenotype();
    g_SummaryHeader->setInverseNormalize(FLAG_inverseNormal);
  }
  // // phenotype transformation
  // if (FLAG_inverseNormal) {
  //   if (binaryPhenotype) {
  //     logger->warn(
  //         "WARNING: Skip transforming binary phenotype, although you required
  //         "
  //         "inverse normalization!");
  //   } else {
  //     logger->info("Now applying inverse normalize transformation.");
  //     inverseNormalizeLikeMerlin(&phenotypeInOrder);
  //     g_SummaryHeader->setInverseNormalize(FLAG_inverseNormal);
  //     logger->info("DONE: inverse normalization transformation finished.");
  //   }
  // }

  g_SummaryHeader->recordPhenotype("AnalyzedTrait",
                                   dataLoader.getPhenotype().extractCol(0));

  if (dataLoader.getPhenotype().nrow() == 0) {
    logger->fatal("There are 0 samples with valid phenotypes, quitting...");
    exit(1);
  }
  // if (phenotypeInOrder.empty()) {
  //   logger->fatal("There are 0 samples with valid phenotypes, quitting...");
  //   exit(1);
  // }
  logger->info("Analysis begins with [ %d ] samples...",
               dataLoader.getPhenotype().nrow());
  //////////////////////////////////////////////////////////////////////////////
  // prepare each model
  bool singleVariantMode = FLAG_modelSingle.size() || FLAG_modelMeta.size();
  bool groupVariantMode = (FLAG_modelBurden.size() || FLAG_modelVT.size() ||
                           FLAG_modelKernel.size());
  if (singleVariantMode && groupVariantMode) {
    logger->error("Cannot support both single variant and region based tests");
    exit(1);
  }

  ModelManager modelManager(FLAG_outPrefix);
  // set up models in qtl/binary modes
  if (dataLoader.isBinaryPhenotype()) {
    modelManager.setBinaryOutcome();
    matchPhenotypeAndVCF("missing phenotype (not case/control)", &dataLoader,
                         ge);
  } else {
    modelManager.setQuantitativeOutcome();
  }
  // create models
  modelManager.create("single", FLAG_modelSingle);
  modelManager.create("burden", FLAG_modelBurden);
  modelManager.create("vt", FLAG_modelVT);
  modelManager.create("kernel", FLAG_modelKernel);
  modelManager.create("meta", FLAG_modelMeta);
  if (FLAG_outputRaw) {
    modelManager.create("outputRaw", "dump");
  }

  const std::vector<ModelFitter*>& model = modelManager.getModel();
  const std::vector<FileWriter*>& fOuts = modelManager.getResultFile();
  const size_t numModel = model.size();

  // TODO: optimize this to avoid data copying
  Matrix phenotypeMatrix;
  Matrix covariate;
  toMatrix(dataLoader.getPhenotype(), &phenotypeMatrix);
  toMatrix(dataLoader.getCovariate(), &covariate);

  // determine VCF file reading pattern
  // current support:
  // * line by line ( including range selection)
  // * gene by gene
  // * range by range
  std::string rangeMode = "Single";
  if (FLAG_geneFile.size() && (FLAG_setFile.size() || FLAG_setList.size())) {
    logger->error("Cannot specify both gene file and set file.");
    exit(1);
  }

  if (!FLAG_gene.empty() && FLAG_geneFile.empty()) {
    logger->error("Please provide gene file for gene bases analysis.");
    exit(1);
  }
  OrderedMap<std::string, RangeList> geneRange;
  if (FLAG_geneFile.size()) {
    rangeMode = "Gene";
    int ret =
        loadGeneFile(FLAG_geneFile.c_str(), FLAG_gene.c_str(), &geneRange);
    if (ret < 0 || geneRange.size() == 0) {
      logger->error("Error loading gene file or gene list is empty!");
      return -1;
    } else {
      logger->info("Loaded [ %zu ] genes.", geneRange.size());
    }
  }

  if (!FLAG_set.empty() && FLAG_setFile.empty()) {
    logger->error("Please provide set file for set bases analysis.");
    exit(1);
  }
  if (FLAG_setFile.size()) {
    rangeMode = "Range";
    int ret = loadRangeFile(FLAG_setFile.c_str(), FLAG_set.c_str(), &geneRange);
    if (ret < 0 || geneRange.size() == 0) {
      logger->error("Error loading set file or set list is empty!");
      return -1;
    } else {
      logger->info("Loaded [ %zu ] set to tests.", geneRange.size());
    }
  }
  if (FLAG_setList.size()) {
    rangeMode = "Range";
    int ret = appendListToRange(FLAG_setList, &geneRange);
    if (ret < 0) {
      logger->error("Error loading set list or set list is empty!");
      return -1;
    }
  }

  DataConsolidator dc;
  dc.setSex(&dataLoader.getSex());
  dc.setFormula(&dataLoader.getFormula());
  dc.setGenotypeCounter(ge->getGenotypeCounter());

  // load kinshp if needed by family models
  if (modelManager.hasFamilyModel() ||
      (!FLAG_modelMeta.empty() && !FLAG_kinship.empty())) {
    logger->info("Family-based model specified. Loading kinship file...");

    // process auto kinship
    if (dc.setKinshipSample(dataLoader.getPhenotype().getRowName()) ||
        dc.setKinshipFile(DataConsolidator::KINSHIP_AUTO, FLAG_kinship) ||
        dc.setKinshipEigenFile(DataConsolidator::KINSHIP_AUTO,
                               FLAG_kinshipEigen) ||
        dc.loadKinship(DataConsolidator::KINSHIP_AUTO)) {
      logger->error(
          "Failed to load autosomal kinship (you may use vcf2kinship to "
          "generate one).");
      exit(1);
    }

    if (dc.setKinshipFile(DataConsolidator::KINSHIP_X, FLAG_xHemiKinship) ||
        dc.setKinshipEigenFile(DataConsolidator::KINSHIP_X,
                               FLAG_xHemiKinshipEigen) ||
        dc.loadKinship(DataConsolidator::KINSHIP_X)) {
      logger->warn(
          "Autosomal kinship loaded, but no hemizygote region kinship "
          "provided, some sex chromosome tests will be skipped.");
      // keep the program going
    }
  } else if (!FLAG_kinship.empty() && FLAG_modelMeta.empty()) {
    logger->info(
        "Family-based model not specified. Options related to kinship will be "
        "ignored here.");
  }

  // set imputation method
  if (FLAG_impute.empty()) {
    logger->info("Impute missing genotype to mean (by default)");
    dc.setStrategy(DataConsolidator::IMPUTE_MEAN);
  } else if (FLAG_impute == "mean") {
    logger->info("Impute missing genotype to mean");
    dc.setStrategy(DataConsolidator::IMPUTE_MEAN);
  } else if (FLAG_impute == "hwe") {
    logger->info("Impute missing genotype by HWE");
    dc.setStrategy(DataConsolidator::IMPUTE_HWE);
  } else if (FLAG_impute == "drop") {
    logger->info("Drop missing genotypes");
    dc.setStrategy(DataConsolidator::DROP);
  }
  dc.setPhenotypeName(dataLoader.getPhenotype().getRowName());

  // set up par region
  ParRegion parRegion(FLAG_xLabel, FLAG_xParRegion);
  dc.setParRegion(&parRegion);

  // genotype will be extracted and stored
  if (FLAG_freqUpper > 0) {
    ge->setSiteFreqMax(FLAG_freqUpper);
    logger->info("Set upper minor allele frequency limit to %g",
                 FLAG_freqUpper);
  }
  if (FLAG_freqLower > 0) {
    ge->setSiteFreqMin(FLAG_freqLower);
    logger->info("Set lower minor allele frequency limit to %g",
                 FLAG_freqLower);
  }

  // handle sex chromosome
  ge->setParRegion(&parRegion);
  ge->setSex(&dataLoader.getSex());

  // use dosage instead GT
  if (!FLAG_dosageTag.empty()) {
    ge->setDosageTag(FLAG_dosageTag);
    logger->info("Use dosage genotype from VCF flag %s.",
                 FLAG_dosageTag.c_str());
  }

  // multi-allelic sites will be treats as ref/alt1, ref/alt2, ref/alt3..
  // instead of ref/alt1 (biallelic)
  if (FLAG_multiAllele) {
    ge->enableMultiAllelicMode();
    logger->info("Enable analysis using multiple allelic models");
  }

  // genotype QC options
  if (FLAG_indvDepthMin > 0) {
    ge->setGDmin(FLAG_indvDepthMin);
    logger->info("Minimum GD set to %d (or marked as missing genotype).",
                 FLAG_indvDepthMin);
  }
  if (FLAG_indvDepthMax > 0) {
    ge->setGDmax(FLAG_indvDepthMax);
    logger->info("Maximum GD set to %d (or marked as missing genotype).",
                 FLAG_indvDepthMax);
  }
  if (FLAG_indvQualMin > 0) {
    ge->setGQmin(FLAG_indvQualMin);
    logger->info("Minimum GQ set to %d (or marked as missing genotype).",
                 FLAG_indvQualMin);
  }

  // e.g. check colinearity and correlations between predictors
  dc.preRegressionCheck(phenotypeMatrix, covariate);

  // prepare PLINK files for BoltLMM model
  if (!FLAG_boltPlink.empty()) {
    if (dc.prepareBoltModel(FLAG_boltPlink,
                            dataLoader.getPhenotype().getRowName(),
                            dataLoader.getPhenotype())) {
      logger->error(
          "Failed to prepare inputs for BOLT-LMM association test model with "
          "this prefix [ %s ]!",
          FLAG_boltPlink.c_str());
      exit(1);
    }
  }

  logger->info("Analysis started");
  Result& buf = dc.getResult();
  Matrix& genotype = dc.getOriginalGenotype();

  // we have three modes:
  // * single variant reading, single variant test
  // * range variant reading, single variant test
  // * range variant reading, group variant test
  if (rangeMode == "Single" && singleVariantMode) {  // use line by line mode
    buf.addHeader("CHROM");
    buf.addHeader("POS");
    if (FLAG_outputID) {
      buf.addHeader("ID");
    }
    buf.addHeader("REF");
    buf.addHeader("ALT");
    buf.addHeader("N_INFORMATIVE");

    // output headers
    for (size_t m = 0; m < model.size(); m++) {
      model[m]->writeHeader(fOuts[m], buf);
    }

    int variantProcessed = 0;
    while (true) {
      buf.clearValue();
      int ret = ge->extractSingleGenotype(&genotype, &buf);

      if (ret == GenotypeExtractor::FILE_END) {  // reach file end
        break;
      }
      if (ret == GenotypeExtractor::FAIL_FILTER) {
        continue;
      }
      if (ret != GenotypeExtractor::SUCCEED) {
        logger->error("Extract genotype failed at site: %s:%s!",
                      buf["CHROM"].c_str(), buf["POS"].c_str());
        continue;
      }
      if (genotype.cols == 0) {
        logger->warn("Extract [ %s:%s ] has 0 variants, skipping",
                     buf["CHROM"].c_str(), buf["POS"].c_str());
        continue;
      }

      ++variantProcessed;
      dc.consolidate(phenotypeMatrix, covariate, genotype);

      buf.updateValue("N_INFORMATIVE", toString(genotype.rows));

      // logger->info("Test variant at site: %s:%s!",
      //               buf["CHROM"].c_str(), buf["POS"].c_str());

      // fit each model
      for (size_t m = 0; m != numModel; m++) {
        model[m]->reset();
        model[m]->fit(&dc);
        model[m]->writeOutput(fOuts[m], buf);
      }
    }
    logger->info("Analyzed [ %d ] variants", variantProcessed);
  } else if (rangeMode != "Single" &&
             singleVariantMode) {  // read by gene/range model, single variant
    // test
    buf.addHeader(rangeMode);
    buf.addHeader("CHROM");
    buf.addHeader("POS");
    if (FLAG_outputID) {
      buf.addHeader("ID");
    }
    buf.addHeader("REF");
    buf.addHeader("ALT");
    buf.addHeader("N_INFORMATIVE");

    // output headers
    for (size_t m = 0; m < numModel; m++) {
      model[m]->writeHeader(fOuts[m], buf);
    }
    std::string geneName;
    RangeList rangeList;
    int variantProcessed = 0;
    for (size_t i = 0; i < geneRange.size(); ++i) {
      geneRange.at(i, &geneName, &rangeList);
      ge->setRange(rangeList);

      while (true) {
        buf.clearValue();
        int ret = ge->extractSingleGenotype(&genotype, &buf);
        if (ret == GenotypeExtractor::FILE_END) {  // reach end of this region
          break;
        }
        if (ret == GenotypeExtractor::FAIL_FILTER) {
          continue;
        }
        if (ret != GenotypeExtractor::SUCCEED) {
          logger->error("Extract genotype failed for gene %s!",
                        geneName.c_str());
          continue;
        }
        if (genotype.cols == 0) {
          logger->warn("Gene %s has 0 variants, skipping", geneName.c_str());
          continue;
        }

        ++variantProcessed;
        dc.consolidate(phenotypeMatrix, covariate, genotype);

        buf.updateValue(rangeMode, geneName);
        buf.updateValue("N_INFORMATIVE", genotype.rows);

        // #pragma omp parallel for
        for (size_t m = 0; m != numModel; m++) {
          model[m]->reset();
          model[m]->fit(&dc);
          model[m]->writeOutput(fOuts[m], buf);
        }
      }
    }
    logger->info("Analyzed [ %d ] variants from [ %d ] genes/regions",
                 variantProcessed, (int)geneRange.size());
  } else if (rangeMode != "Single" &&
             groupVariantMode) {  // read by gene/range mode, group variant
                                  // test
    buf.addHeader(rangeMode);
    buf.addHeader("RANGE");
    buf.addHeader("N_INFORMATIVE");
    buf.addHeader("NumVar");
    buf.addHeader("NumPolyVar");

    // output headers
    for (size_t m = 0; m < numModel; m++) {
      model[m]->writeHeader(fOuts[m], buf);
    }
    std::string geneName;
    RangeList rangeList;
    int variantProcessed = 0;
    ge->enableAutoMerge();
    for (size_t i = 0; i < geneRange.size(); ++i) {
      geneRange.at(i, &geneName, &rangeList);
      ge->setRange(rangeList);
      buf.clearValue();
      int ret = ge->extractMultipleGenotype(&genotype);

      if (ret != GenotypeExtractor::SUCCEED) {
        logger->error("Extract genotype failed for gene %s!", geneName.c_str());
        continue;
      }
      if (genotype.cols == 0) {
        logger->info("Gene %s has 0 variants, skipping", geneName.c_str());
        continue;
      }

      variantProcessed += genotype.cols;  // genotype is people by marker
      dc.consolidate(phenotypeMatrix, covariate, genotype);

      buf.updateValue(rangeMode, geneName);
      buf.updateValue("RANGE", rangeList.toString());
      buf.updateValue("N_INFORMATIVE", genotype.rows);
      buf.updateValue("NumVar", genotype.cols);
      buf.updateValue("NumPolyVar",
                      dc.getFlippedToMinorPolymorphicGenotype().cols);

      // #ifdef _OPENMP
      // #pragma omp parallel for
      // #endif
      for (size_t m = 0; m != numModel; m++) {
        model[m]->reset();
        model[m]->fit(&dc);
        model[m]->writeOutput(fOuts[m], buf);
      }
    }
    logger->info("Analyzed [ %d ] variants from [ %d ] genes/regions",
                 variantProcessed, (int)geneRange.size());
  } else {
    logger->error(
        "Unsupported reading mode and test modes! (need more parameters?)");
    exit(1);
  }

  // Resource cleaning up
  modelManager.close();
  delete g_SummaryHeader;

  time_t endTime = time(0);
  logger->info("Analysis ends at: %s", currentTime().c_str());
  int elapsedSecond = (int)(endTime - startTime);
  logger->info("Analysis took %d seconds", elapsedSecond);

  fputs("RVTESTS finished successfully\n", stdout);
  return 0;
}