QVariantMap MemoryUsageModel::details(int index) const { QVariantMap result; const MemoryAllocationItem *ev = &m_data[index]; if (ev->allocated >= -ev->deallocated) result.insert(QLatin1String("displayName"), tr("Memory Allocated")); else result.insert(QLatin1String("displayName"), tr("Memory Freed")); result.insert(tr("Total"), tr("%1 bytes").arg(ev->size)); if (ev->allocations > 0) { result.insert(tr("Allocated"), tr("%1 bytes").arg(ev->allocated)); result.insert(tr("Allocations"), QString::number(ev->allocations)); } if (ev->deallocations > 0) { result.insert(tr("Deallocated"), tr("%1 bytes").arg(-ev->deallocated)); result.insert(tr("Deallocations"), QString::number(ev->deallocations)); } QString memoryTypeName; switch (selectionId(index)) { case HeapPage: memoryTypeName = tr("Heap Allocation"); break; case LargeItem: memoryTypeName = tr("Large Item Allocation"); break; case SmallItem: memoryTypeName = tr("Heap Usage"); break; default: Q_UNREACHABLE(); } result.insert(tr("Type"), memoryTypeName); result.insert(tr("Location"), modelManager()->qmlModel()->eventTypes().at(ev->typeId).displayName()); return result; }
bool QmlProfilerTimelineModel::handlesTypeId(int typeIndex) const { if (typeIndex < 0) return false; return accepted(modelManager()->qmlModel()->getEventTypes().at(typeIndex)); }
void QmlProfilerRangeModel::finalize() { if (!m_stack.isEmpty()) { qWarning() << "End times for some events are missing."; const qint64 endTime = modelManager()->traceEnd(); do { int index = m_stack.pop(); insertEnd(index, endTime - startTime(index)); } while (!m_stack.isEmpty()); } // compute range nesting computeNesting(); // compute nestingLevel - nonexpanded computeNestingContracted(); // compute nestingLevel - expanded computeExpandedLevels(); if (supportsBindingLoops()) findBindingLoops(); QmlProfilerTimelineModel::finalize(); }
QVariantMap DebugMessagesModel::details(int index) const { const QmlEventType &type = modelManager()->qmlModel()->eventTypes()[m_data[index].typeId]; QVariantMap result; result.insert(QLatin1String("displayName"), messageType(type.detailType())); result.insert(tr("Timestamp"), QmlProfilerDataModel::formatTime(startTime(index))); result.insert(tr("Message"), m_data[index].text); result.insert(tr("Location"), type.displayName()); return result; }
void PixmapCacheModel::finalize() { if (m_lastCacheSizeEvent != -1) { insertEnd(m_lastCacheSizeEvent, modelManager()->traceTime()->endTime() - startTime(m_lastCacheSizeEvent)); } resizeUnfinishedLoads(); computeMaxCacheSize(); flattenLoads(); computeNesting(); }
void QmlProfilerTraceFile::loadQzt(QIODevice *device) { QDataStream stream(device); stream.setVersion(QDataStream::Qt_5_5); QByteArray magic; stream >> magic; if (magic != QByteArray("QMLPROFILER")) { fail(tr("Invalid magic: %1").arg(QLatin1String(magic))); return; } qint32 dataStreamVersion; stream >> dataStreamVersion; if (dataStreamVersion > QDataStream::Qt_DefaultCompiledVersion) { fail(tr("Unknown data stream version: %1").arg(dataStreamVersion)); return; } stream.setVersion(dataStreamVersion); qint64 traceStart, traceEnd; stream >> traceStart >> traceEnd; setTraceStart(traceStart); setTraceEnd(traceEnd); QBuffer buffer; QDataStream bufferStream(&buffer); bufferStream.setVersion(dataStreamVersion); QByteArray data; setDeviceProgress(device); QmlProfilerModelManager *manager = modelManager(); if (!isCanceled()) { stream >> data; buffer.setData(qUncompress(data)); buffer.open(QIODevice::ReadOnly); quint32 numEventTypes; bufferStream >> numEventTypes; if (numEventTypes > quint32(std::numeric_limits<int>::max())) { fail(tr("Excessive number of event types: %1").arg(numEventTypes)); return; } for (int typeId = 0; typeId < static_cast<int>(numEventTypes); ++typeId) { QmlEventType type; bufferStream >> type; manager->setEventType(typeId, std::move(type)); } buffer.close(); setDeviceProgress(device); }
QVariantMap QmlProfilerRangeModel::details(int index) const { QVariantMap result; int id = selectionId(index); result.insert(QStringLiteral("displayName"), tr(QmlProfilerModelManager::featureName(mainFeature()))); result.insert(tr("Duration"), Timeline::formatTime(duration(index))); const QmlEventType &type = modelManager()->eventType(id); result.insert(tr("Details"), type.data()); result.insert(tr("Location"), type.displayName()); return result; }
QVariantMap QmlProfilerRangeModel::location(int index) const { QVariantMap result; int id = selectionId(index); const QmlDebug::QmlEventLocation &location = modelManager()->qmlModel()->getEventTypes().at(id).location; result.insert(QStringLiteral("file"), location.filename); result.insert(QStringLiteral("line"), location.line); result.insert(QStringLiteral("column"), location.column); return result; }
void PixmapCacheModel::resizeUnfinishedLoads() { // all the unfinished "load start" events continue till the end of the trace for (auto pixmap = m_pixmaps.begin(), pixmapsEnd = m_pixmaps.end(); pixmap != pixmapsEnd; ++pixmap) { for (auto size = pixmap->sizes.begin(), sizesEnd = pixmap->sizes.end(); size != sizesEnd; ++size) { if (size->loadState == Loading) { insertEnd(size->started, modelManager()->traceTime()->endTime() - startTime(size->started)); size->loadState = Error; } } } }
QVariantMap QmlProfilerRangeModel::details(int index) const { QVariantMap result; int id = selectionId(index); const QVector<QmlProfilerDataModel::QmlEventTypeData> &types = modelManager()->qmlModel()->getEventTypes(); result.insert(QStringLiteral("displayName"), tr(QmlProfilerModelManager::featureName(mainFeature()))); result.insert(tr("Duration"), QmlProfilerBaseModel::formatTime(duration(index))); result.insert(tr("Details"), types[id].data); result.insert(tr("Location"), types[id].displayName); return result; }
int QmlProfilerRangeModel::selectionIdForLocation(const QString &filename, int line, int column) const { // if this is called from v8 view, we don't have the column number, it will be -1 const QVector<QmlProfilerDataModel::QmlEventTypeData> &types = modelManager()->qmlModel()->getEventTypes(); for (int i = 1; i < expandedRowCount(); ++i) { int typeId = m_expandedRowTypes[i]; const QmlProfilerDataModel::QmlEventTypeData &eventData = types[typeId]; if (eventData.location.filename == filename && eventData.location.line == line && (column == -1 || eventData.location.column == column)) return typeId; } return -1; }
QVariantList QmlProfilerRangeModel::labels() const { QVariantList result; const QVector<QmlProfilerDataModel::QmlEventTypeData> &types = modelManager()->qmlModel()->getEventTypes(); for (int i = 1; i < expandedRowCount(); i++) { // Ignore the -1 for the first row QVariantMap element; int typeId = m_expandedRowTypes[i]; element.insert(QLatin1String("displayName"), QVariant(types[typeId].displayName)); element.insert(QLatin1String("description"), QVariant(types[typeId].data)); element.insert(QLatin1String("id"), QVariant(typeId)); result << element; } return result; }
QVariantList QmlProfilerRangeModel::labels() const { QVariantList result; const QmlProfilerModelManager *manager = modelManager(); for (int i = 1; i < expandedRowCount(); i++) { // Ignore the -1 for the first row QVariantMap element; const int typeId = m_expandedRowTypes[i]; const QmlEventType &type = manager->eventType(typeId); element.insert(QLatin1String("displayName"), type.displayName()); element.insert(QLatin1String("description"), type.data()); element.insert(QLatin1String("id"), typeId); result << element; } return result; }
QVariantMap QmlProfilerTimelineModel::locationFromTypeId(int index) const { QVariantMap result; int id = typeId(index); if (id < 0) return result; auto types = modelManager()->qmlModel()->eventTypes(); if (id >= types.length()) return result; QmlEventLocation location = types.at(id).location(); result.insert(QStringLiteral("file"), location.filename()); result.insert(QStringLiteral("line"), location.line()); result.insert(QStringLiteral("column"), location.column()); return result; }
/* Ultimately there is no way to know which cache entry a given event refers to as long as we only * receive the pixmap URL from the application. Multiple copies of different sizes may be cached * for each URL. However, we can apply some heuristics to make the result somewhat plausible by * using the following assumptions: * * - PixmapSizeKnown will happen at most once for every cache entry. * - PixmapSizeKnown cannot happen for entries with PixmapLoadingError and vice versa. * - PixmapCacheCountChanged can happen for entries with PixmapLoadingError but doesn't have to. * - Decreasing PixmapCacheCountChanged events can only happen for entries that have seen an * increasing PixmapCacheCountChanged (but that may have happened before the trace). * - PixmapCacheCountChanged can happen before or after PixmapSizeKnown. * - For every PixmapLoadingFinished or PixmapLoadingError there is exactly one * PixmapLoadingStarted event, but it may be before the trace. * - For every PixmapLoadingStarted there is exactly one PixmapLoadingFinished or * PixmapLoadingError, but it may be after the trace. * - Decreasing PixmapCacheCountChanged events in the presence of corrupt cache entries are more * likely to clear those entries than other, correctly loaded ones. * - Increasing PixmapCacheCountChanged events are more likely to refer to correctly loaded entries * than to ones with PixmapLoadingError. * - PixmapLoadingFinished and PixmapLoadingError are more likely to refer to cache entries that * have seen a PixmapLoadingStarted than to ones that haven't. * * For each URL we keep an ordered list of pixmaps possibly being loaded and assign new events to * the first entry that "fits". If multiple sizes of the same pixmap are being loaded concurrently * we generally assume that the PixmapLoadingFinished and PixmapLoadingError events occur in the * order we learn about the existence of these sizes, subject to the above constraints. This is not * necessarily the order the pixmaps are really loaded but it's the best we can do with the given * information. If they're loaded sequentially the representation is correct. */ void PixmapCacheModel::loadEvent(const QmlEvent &event, const QmlEventType &type) { PixmapCacheItem newEvent; const PixmapEventType pixmapType = static_cast<PixmapEventType>(type.detailType()); newEvent.pixmapEventType = pixmapType; qint64 pixmapStartTime = event.timestamp(); newEvent.urlIndex = -1; for (auto i = m_pixmaps.cend(), begin = m_pixmaps.cbegin(); i != begin;) { if ((--i)->url == type.location().filename()) { newEvent.urlIndex = i - m_pixmaps.cbegin(); break; } } newEvent.sizeIndex = -1; if (newEvent.urlIndex == -1) { newEvent.urlIndex = m_pixmaps.count(); m_pixmaps << Pixmap(type.location().filename()); } Pixmap &pixmap = m_pixmaps[newEvent.urlIndex]; switch (pixmapType) { case PixmapSizeKnown: {// pixmap size // Look for pixmaps for which we don't know the size, yet and which have actually been // loaded. for (auto i = pixmap.sizes.begin(), end = pixmap.sizes.end(); i != end; ++i) { if (i->size.isValid() || i->cacheState == Uncacheable || i->cacheState == Corrupt) continue; // We can't have cached it before we knew the size Q_ASSERT(i->cacheState != Cached); i->size.setWidth(event.number<qint32>(0)); i->size.setHeight(event.number<qint32>(1)); newEvent.sizeIndex = i - pixmap.sizes.begin(); break; } if (newEvent.sizeIndex == -1) { newEvent.sizeIndex = pixmap.sizes.length(); pixmap.sizes << PixmapState(event.number<qint32>(0), event.number<qint32>(1)); } PixmapState &state = pixmap.sizes[newEvent.sizeIndex]; if (state.cacheState == ToBeCached) { m_lastCacheSizeEvent = updateCacheCount(m_lastCacheSizeEvent, pixmapStartTime, state.size.width() * state.size.height(), newEvent, event.typeIndex()); state.cacheState = Cached; } break; } case PixmapCacheCountChanged: {// Cache Size Changed Event bool uncache = m_cumulatedCount > event.number<qint32>(2); m_cumulatedCount = event.number<qint32>(2); qint64 pixSize = 0; // First try to find a preferred pixmap, which either is Corrupt and will be uncached // or is uncached and will be cached. for (auto i = pixmap.sizes.begin(), end = pixmap.sizes.end(); i != end; ++i) { if (uncache && i->cacheState == Corrupt) { newEvent.sizeIndex = i - pixmap.sizes.begin(); i->cacheState = Uncacheable; break; } else if (!uncache && i->cacheState == Uncached) { newEvent.sizeIndex = i - pixmap.sizes.begin(); if (i->size.isValid()) { pixSize = i->size.width() * i->size.height(); i->cacheState = Cached; } else { i->cacheState = ToBeCached; } break; } } // If none found, check for cached or ToBeCached pixmaps that shall be uncached or // Error pixmaps that become corrupt cache entries. We also accept Initial to be // uncached as we may have missed the matching PixmapCacheCountChanged that cached it. if (newEvent.sizeIndex == -1) { for (auto i = pixmap.sizes.begin(), end = pixmap.sizes.end(); i != end; ++i) { if (uncache && (i->cacheState == Cached || i->cacheState == ToBeCached)) { newEvent.sizeIndex = i - pixmap.sizes.begin(); if (i->size.isValid()) pixSize = -i->size.width() * i->size.height(); i->cacheState = Uncached; break; } else if (!uncache && i->cacheState == Uncacheable) { // A pixmap can repeatedly be cached, become corrupt, and the be uncached again. newEvent.sizeIndex = i - pixmap.sizes.begin(); i->cacheState = Corrupt; break; } } } // If that does't work, create a new entry. if (newEvent.sizeIndex == -1) { newEvent.sizeIndex = pixmap.sizes.length(); pixmap.sizes << PixmapState(uncache ? Uncached : ToBeCached); // now the size is 0. Thus, there is no point in updating the size row. } else if (pixSize != 0) { m_lastCacheSizeEvent = updateCacheCount(m_lastCacheSizeEvent, pixmapStartTime, pixSize, newEvent, event.typeIndex()); } break; } case PixmapLoadingStarted: { // Load // Look for a pixmap that hasn't been started, yet. There may have been a refcount // event, which we ignore. for (auto i = pixmap.sizes.cbegin(), end = pixmap.sizes.cend(); i != end; ++i) { if (i->loadState == Initial) { newEvent.sizeIndex = i - pixmap.sizes.cbegin(); break; } } if (newEvent.sizeIndex == -1) { newEvent.sizeIndex = pixmap.sizes.length(); pixmap.sizes << PixmapState(); } PixmapState &state = pixmap.sizes[newEvent.sizeIndex]; state.loadState = Loading; newEvent.typeId = event.typeIndex(); state.started = insertStart(pixmapStartTime, newEvent.urlIndex + 1); m_data.insert(state.started, newEvent); break; } case PixmapLoadingFinished: case PixmapLoadingError: { // First try to find one that has already started for (auto i = pixmap.sizes.cbegin(), end = pixmap.sizes.cend(); i != end; ++i) { if (i->loadState != Loading) continue; // Pixmaps with known size cannot be errors and vice versa if (pixmapType == PixmapLoadingError && i->size.isValid()) continue; newEvent.sizeIndex = i - pixmap.sizes.cbegin(); break; } // If none was found use any other compatible one if (newEvent.sizeIndex == -1) { for (auto i = pixmap.sizes.cbegin(), end = pixmap.sizes.cend(); i != end; ++i) { if (i->loadState != Initial) continue; // Pixmaps with known size cannot be errors and vice versa if (pixmapType == PixmapLoadingError && i->size.isValid()) continue; newEvent.sizeIndex = i - pixmap.sizes.cbegin(); break; } } // If again none was found, create one. if (newEvent.sizeIndex == -1) { newEvent.sizeIndex = pixmap.sizes.length(); pixmap.sizes << PixmapState(); } PixmapState &state = pixmap.sizes[newEvent.sizeIndex]; // If the pixmap loading wasn't started, start it at traceStartTime() if (state.loadState == Initial) { newEvent.pixmapEventType = PixmapLoadingStarted; newEvent.typeId = event.typeIndex(); qint64 traceStart = modelManager()->traceTime()->startTime(); state.started = insert(traceStart, pixmapStartTime - traceStart, newEvent.urlIndex + 1); m_data.insert(state.started, newEvent); // All other indices are wrong now as we've prepended. Fix them ... if (m_lastCacheSizeEvent >= state.started) ++m_lastCacheSizeEvent; for (int pixmapIndex = 0; pixmapIndex < m_pixmaps.count(); ++pixmapIndex) { Pixmap &brokenPixmap = m_pixmaps[pixmapIndex]; for (int sizeIndex = 0; sizeIndex < brokenPixmap.sizes.count(); ++sizeIndex) { PixmapState &brokenSize = brokenPixmap.sizes[sizeIndex]; if ((pixmapIndex != newEvent.urlIndex || sizeIndex != newEvent.sizeIndex) && brokenSize.started >= state.started) { ++brokenSize.started; } } } } else { insertEnd(state.started, pixmapStartTime - startTime(state.started)); } if (pixmapType == PixmapLoadingError) { state.loadState = Error; switch (state.cacheState) { case Uncached: state.cacheState = Uncacheable; break; case ToBeCached: state.cacheState = Corrupt; break; default: // Cached cannot happen as size would have to be known and Corrupt or // Uncacheable cannot happen as we only accept one finish or error event per // pixmap. Q_UNREACHABLE(); } } else { state.loadState = Finished; } break; } default: break; } }
void MergedModelFactory_cl::MergeModel() { VASSERT(m_iModelsToMerge > 1 && m_iModelsToMerge < 11); DeleteModels(); // Always use the body MeshMergeInfo_t info[32]; // maximum count info[0].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Body.model", true); int i = 1; // Armor if (m_bArmArmor) info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Arm.model", true); if (m_bShoulderArmor) info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Shoulder.model", true); if (m_bLegsArmor) info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Legs.model", true); if (m_bKneeArmor) info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Knee.model", true); if (m_bAccessoire) info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Accessoire.model", true); if (m_bBelt) info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Belt.model", true); if (m_bCloth) info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Cloth.model", true); if (m_bBeard) info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Beard.model", true); // Weapon if (m_bAxe) info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Axe.model", true); else info[i++].m_pMesh = Vision::Game.LoadDynamicMesh("Barbarian_Sword.model", true); // Setup model m_pMergedModelEntity = (KeyControlledTransitionBarbarian_cl *) Vision::Game.CreateEntity("KeyControlledTransitionBarbarian_cl", m_vPos, "Barbarian_Body.model"); if (m_iModelsToMerge > 1) { // Merge model VMeshManager &modelManager(VDynamicMesh::GetResourceManager()); VDynamicMesh *pMergedModel = modelManager.MergeDynamicMeshes("MergedModel", info, m_iModelsToMerge); VTransitionStateMachine* pTransitionStateMachine = m_pMergedModelEntity->Components().GetComponentOfType<VTransitionStateMachine>(); if(pTransitionStateMachine != NULL) m_pMergedModelEntity->RemoveComponent(pTransitionStateMachine); m_pMergedModelEntity->SetMesh(pMergedModel); // Load sequence set VisAnimSequenceSet_cl *pSet = Vision::Animations.GetSequenceSetManager()->LoadAnimSequenceSet("Barbarian_Body.anim"); m_pMergedModelEntity->GetMesh()->GetSequenceSetCollection()->Add(pSet); if(pTransitionStateMachine != NULL) m_pMergedModelEntity->AddComponent(pTransitionStateMachine); } // Init entity m_pMergedModelEntity->InitFunction(); m_pMergedModelEntity->SetPosition(m_vPos); m_pMergedModelEntity->SetOrientation(m_vOri); m_pCameraEntity->AttachToParent(m_pMergedModelEntity); m_pPlayerCamera->ResetOldPosition(); m_pPlayerCamera->Follow = false; m_pPlayerCamera->Zoom = true; }
int main(int argc, char** argv) { //////////////////////////////////////////////// BEGIN_PARAMETER_LIST(pl) ADD_PARAMETER_GROUP(pl, "Basic Input/Output") ADD_STRING_PARAMETER(pl, inVcf, "--inVcf", "Input VCF File") ADD_STRING_PARAMETER(pl, outPrefix, "--out", "Output prefix") ADD_BOOL_PARAMETER(pl, outputRaw, "--outputRaw", "Output genotypes, phenotype, covariates(if any) and " "collapsed genotype to tabular files") ADD_PARAMETER_GROUP(pl, "Specify Covariate") ADD_STRING_PARAMETER(pl, cov, "--covar", "Specify covariate file") ADD_STRING_PARAMETER( pl, covName, "--covar-name", "Specify the column name in covariate file to be included in analysis") ADD_BOOL_PARAMETER(pl, sex, "--sex", "Include sex (5th column in the PED file) as a covariate") ADD_PARAMETER_GROUP(pl, "Specify Phenotype") ADD_STRING_PARAMETER(pl, pheno, "--pheno", "Specify phenotype file") ADD_BOOL_PARAMETER(pl, inverseNormal, "--inverseNormal", "Transform phenotype like normal distribution") ADD_BOOL_PARAMETER( pl, useResidualAsPhenotype, "--useResidualAsPhenotype", "Fit covariate ~ phenotype, use residual to replace phenotype") ADD_STRING_PARAMETER(pl, mpheno, "--mpheno", "Specify which phenotype column to read (default: 1)") ADD_STRING_PARAMETER(pl, phenoName, "--pheno-name", "Specify which phenotype column to read by header") ADD_BOOL_PARAMETER(pl, qtl, "--qtl", "Treat phenotype as quantitative trait") ADD_STRING_PARAMETER( pl, multiplePheno, "--multiplePheno", "Specify aa template file for analyses of more than one phenotype") ADD_PARAMETER_GROUP(pl, "Specify Genotype") ADD_STRING_PARAMETER(pl, dosageTag, "--dosage", "Specify which dosage tag to use. (e.g. EC or DS)") ADD_PARAMETER_GROUP(pl, "Chromosome X Options") ADD_STRING_PARAMETER(pl, xLabel, "--xLabel", "Specify X chromosome label (default: 23|X)") ADD_STRING_PARAMETER(pl, xParRegion, "--xParRegion", "Specify PAR region (default: hg19), can be build " "number e.g. hg38, b37; or specify region, e.g. " "'60001-2699520,154931044-155260560'") ADD_PARAMETER_GROUP(pl, "People Filter") ADD_STRING_PARAMETER(pl, peopleIncludeID, "--peopleIncludeID", "List IDs of people that will be included in study") ADD_STRING_PARAMETER( pl, peopleIncludeFile, "--peopleIncludeFile", "From given file, set IDs of people that will be included in study") ADD_STRING_PARAMETER(pl, peopleExcludeID, "--peopleExcludeID", "List IDs of people that will be included in study") ADD_STRING_PARAMETER( pl, peopleExcludeFile, "--peopleExcludeFile", "From given file, set IDs of people that will be included in study") ADD_PARAMETER_GROUP(pl, "Site Filter") ADD_STRING_PARAMETER( pl, rangeList, "--rangeList", "Specify some ranges to use, please use chr:begin-end format.") ADD_STRING_PARAMETER( pl, rangeFile, "--rangeFile", "Specify the file containing ranges, please use chr:begin-end format.") ADD_STRING_PARAMETER(pl, siteFile, "--siteFile", "Specify the file containing sites to include, please " "use \"chr pos\" format.") ADD_INT_PARAMETER( pl, siteDepthMin, "--siteDepthMin", "Specify minimum depth(inclusive) to be included in analysis") ADD_INT_PARAMETER( pl, siteDepthMax, "--siteDepthMax", "Specify maximum depth(inclusive) to be included in analysis") ADD_INT_PARAMETER(pl, siteMACMin, "--siteMACMin", "Specify minimum Minor Allele Count(inclusive) to be " "included in analysis") ADD_STRING_PARAMETER(pl, annoType, "--annoType", "Specify annotation type that is followed by ANNO= in " "the VCF INFO field, regular expression is allowed ") ADD_PARAMETER_GROUP(pl, "Genotype Filter") ADD_INT_PARAMETER( pl, indvDepthMin, "--indvDepthMin", "Specify minimum depth(inclusive) of a sample to be included in analysis") ADD_INT_PARAMETER( pl, indvDepthMax, "--indvDepthMax", "Specify maximum depth(inclusive) of a sample to be included in analysis") ADD_INT_PARAMETER( pl, indvQualMin, "--indvQualMin", "Specify minimum depth(inclusive) of a sample to be included in analysis") ADD_PARAMETER_GROUP(pl, "Association Model") ADD_STRING_PARAMETER(pl, modelSingle, "--single", "Single variant tests, choose from: score, wald, exact, " "famScore, famLrt, famGrammarGamma, firth") ADD_STRING_PARAMETER(pl, modelBurden, "--burden", "Burden tests, choose from: cmc, zeggini, mb, exactCMC, " "rarecover, cmat, cmcWald") ADD_STRING_PARAMETER(pl, modelVT, "--vt", "Variable threshold tests, choose from: price, analytic") ADD_STRING_PARAMETER( pl, modelKernel, "--kernel", "Kernal-based tests, choose from: SKAT, KBAC, FamSKAT, SKATO") ADD_STRING_PARAMETER(pl, modelMeta, "--meta", "Meta-analysis related functions to generate summary " "statistics, choose from: score, cov, dominant, " "recessive") ADD_PARAMETER_GROUP(pl, "Family-based Models") ADD_STRING_PARAMETER(pl, kinship, "--kinship", "Specify a kinship file for autosomal analysis, use " "vcf2kinship to generate") ADD_STRING_PARAMETER(pl, xHemiKinship, "--xHemiKinship", "Provide kinship for the chromosome X hemizygote region") ADD_STRING_PARAMETER(pl, kinshipEigen, "--kinshipEigen", "Specify eigen decomposition results of a kinship file " "for autosomal analysis") ADD_STRING_PARAMETER( pl, xHemiKinshipEigen, "--xHemiKinshipEigen", "Specify eigen decomposition results of a kinship file for X analysis") ADD_PARAMETER_GROUP(pl, "Grouping Unit ") ADD_STRING_PARAMETER(pl, geneFile, "--geneFile", "Specify a gene file (for burden tests)") ADD_STRING_PARAMETER(pl, gene, "--gene", "Specify which genes to test") ADD_STRING_PARAMETER(pl, setList, "--setList", "Specify a list to test (for burden tests)") ADD_STRING_PARAMETER(pl, setFile, "--setFile", "Specify a list file (for burden tests, first 2 " "columns: setName chr:beg-end)") ADD_STRING_PARAMETER(pl, set, "--set", "Specify which set to test (1st column)") ADD_PARAMETER_GROUP(pl, "Frequency Cutoff") /*ADD_BOOL_PARAMETER(pl, freqFromFile, "--freqFromFile", "Obtain frequency * from external file")*/ // ADD_BOOL_PARAMETER(pl, freqFromControl, "--freqFromControl", "Calculate // frequency from case samples") ADD_DOUBLE_PARAMETER( pl, freqUpper, "--freqUpper", "Specify upper minor allele frequency bound to be included in analysis") ADD_DOUBLE_PARAMETER( pl, freqLower, "--freqLower", "Specify lower minor allele frequency bound to be included in analysis") ADD_PARAMETER_GROUP(pl, "Missing Data") ADD_STRING_PARAMETER( pl, impute, "--impute", "Impute missing genotype (default:mean): mean, hwe, and drop") ADD_BOOL_PARAMETER( pl, imputePheno, "--imputePheno", "Impute phenotype to mean of those have genotypes but no phenotypes") ADD_BOOL_PARAMETER(pl, imputeCov, "--imputeCov", "Impute each covariate to its mean, instead of drop " "samples with missing covariates") ADD_PARAMETER_GROUP(pl, "Conditional Analysis") ADD_STRING_PARAMETER(pl, condition, "--condition", "Specify markers to be conditions (specify range)") ADD_PARAMETER_GROUP(pl, "Auxiliary Functions") ADD_BOOL_PARAMETER(pl, noweb, "--noweb", "Skip checking new version") ADD_BOOL_PARAMETER(pl, help, "--help", "Print detailed help message") END_PARAMETER_LIST(pl); pl.Read(argc, argv); if (FLAG_help) { pl.Help(); return 0; } welcome(); pl.Status(); if (FLAG_REMAIN_ARG.size() > 0) { fprintf(stderr, "Unparsed arguments: "); for (unsigned int i = 0; i < FLAG_REMAIN_ARG.size(); i++) { fprintf(stderr, " %s", FLAG_REMAIN_ARG[i].c_str()); } exit(1); } if (!FLAG_outPrefix.size()) FLAG_outPrefix = "rvtest"; REQUIRE_STRING_PARAMETER(FLAG_inVcf, "Please provide input file using: --inVcf"); // check new version if (!FLAG_noweb) { VersionChecker ver; if (ver.retrieveRemoteVersion("http://zhanxw.com/rvtests/version") < 0) { fprintf(stderr, "Retrieve remote version failed, use '--noweb' to skip.\n"); } else { ver.setLocalVersion(VERSION); if (ver.isRemoteVersionNewer()) { fprintf(stderr, "New version of rvtests is available:"); ver.printRemoteContent(); } } } // start logging Logger _logger((FLAG_outPrefix + ".log").c_str()); logger = &_logger; logger->info("Program version: %s", VERSION); logger->infoToFile("Git Version: %s", GIT_VERSION); logger->infoToFile("Parameters BEGIN"); pl.WriteToFile(logger->getHandle()); logger->infoToFile("Parameters END"); logger->sync(); // start analysis time_t startTime = time(0); logger->info("Analysis started at: %s", currentTime().c_str()); GenotypeExtractor ge(FLAG_inVcf); // set range filters here ge.setRangeList(FLAG_rangeList.c_str()); ge.setRangeFile(FLAG_rangeFile.c_str()); // set people filters here if (FLAG_peopleIncludeID.size() || FLAG_peopleIncludeFile.size()) { ge.excludeAllPeople(); ge.includePeople(FLAG_peopleIncludeID.c_str()); ge.includePeopleFromFile(FLAG_peopleIncludeFile.c_str()); } ge.excludePeople(FLAG_peopleExcludeID.c_str()); ge.excludePeopleFromFile(FLAG_peopleExcludeFile.c_str()); if (FLAG_siteDepthMin > 0) { ge.setSiteDepthMin(FLAG_siteDepthMin); logger->info("Set site depth minimum to %d", FLAG_siteDepthMin); } if (FLAG_siteDepthMax > 0) { ge.setSiteDepthMax(FLAG_siteDepthMax); logger->info("Set site depth maximum to %d", FLAG_siteDepthMax); } if (FLAG_siteMACMin > 0) { ge.setSiteMACMin(FLAG_siteMACMin); logger->info("Set site minimum MAC to %d", FLAG_siteDepthMin); } if (FLAG_annoType != "") { ge.setAnnoType(FLAG_annoType.c_str()); logger->info("Set annotype type filter to %s", FLAG_annoType.c_str()); } std::vector<std::string> vcfSampleNames; ge.getPeopleName(&vcfSampleNames); logger->info("Loaded [ %zu ] samples from VCF files", vcfSampleNames.size()); DataLoader dataLoader; dataLoader.setPhenotypeImputation(FLAG_imputePheno); dataLoader.setCovariateImputation(FLAG_imputeCov); if (FLAG_multiplePheno.empty()) { dataLoader.loadPhenotype(FLAG_pheno, FLAG_mpheno, FLAG_phenoName); // // load phenotypes // std::map<std::string, double> phenotype; // if (FLAG_pheno.empty()) { // logger->error("Cannot do association when phenotype is missing!"); // return -1; // } // // check if alternative phenotype columns are used // if (!FLAG_mpheno.empty() && !FLAG_phenoName.empty()) { // logger->error("Please specify either --mpheno or --pheno-name"); // return -1; // } // if (!FLAG_mpheno.empty()) { // int col = atoi(FLAG_mpheno); // int ret = loadPedPhenotypeByColumn(FLAG_pheno.c_str(), &phenotype, // col); // if (ret < 0) { // logger->error("Loading phenotype failed!"); // return -1; // } // } else if (!FLAG_phenoName.empty()) { // int ret = loadPedPhenotypeByHeader(FLAG_pheno.c_str(), &phenotype, // FLAG_phenoName.c_str()); // if (ret < 0) { // logger->error("Loading phenotype failed!"); // return -1; // } // } else { // int col = 1; // default use the first phenotype // int ret = loadPedPhenotypeByColumn(FLAG_pheno.c_str(), &phenotype, // col); // if (ret < 0) { // logger->error("Loading phenotype failed!"); // return -1; // } // } // logger->info("Loaded [ %zu ] sample pheontypes.", phenotype.size()); // rearrange phenotypes // drop samples from phenotype or vcf matchPhenotypeAndVCF("missing phenotype", &dataLoader, &ge); // // phenotype names (vcf sample names) arranged in the same order as in // VCF // std::vector<std::string> phenotypeNameInOrder; // std::vector<double> // phenotypeInOrder; // phenotype arranged in the same order as in VCF // rearrange(phenotype, vcfSampleNames, &vcfSampleToDrop, // &phenotypeNameInOrder, // &phenotypeInOrder, FLAG_imputePheno); // if (vcfSampleToDrop.size()) { // // exclude this sample from parsing VCF // ge.excludePeople(vcfSampleToDrop); // // output dropped samples // for (size_t i = 0; i < vcfSampleToDrop.size(); ++i) { // if (i == 0) // logger->warn( // "Total [ %zu ] samples are dropped from VCF file due to missing // " // "phenotype", // vcfSampleToDrop.size()); // if (i >= 10) { // logger->warn( // "Skip outputting additional [ %d ] samples with missing " // "phenotypes.", // ((int)vcfSampleToDrop.size() - 10)); // break; // } // logger->warn("Drop sample [ %s ] from VCF file due to missing // phenotype", // (vcfSampleToDrop)[i].c_str()); // } // // logger->warn("Drop %zu sample from VCF file since we don't have // their // // phenotypes", vcfSampleToDrop.size()); // } // if (phenotypeInOrder.size() != phenotype.size()) { // logger->warn( // "Drop [ %d ] samples from phenotype file due to missing genotypes // from " // "VCF files", // (int)(phenotype.size() - phenotypeInOrder.size())); // // We may output these samples by comparing keys of phenotype and // // phenotypeNameInOrder // } dataLoader.loadCovariate(FLAG_cov, FLAG_covName); matchCovariateAndVCF("missing covariate", &dataLoader, &ge); // // load covariate // Matrix covariate; // HandleMissingCov handleMissingCov = COVARIATE_DROP; // if (FLAG_imputeCov) { // handleMissingCov = COVARIATE_IMPUTE; // } // if (FLAG_cov.empty() && !FLAG_covName.empty()) { // logger->info("Use phenotype file as covariate file [ %s ]", // FLAG_pheno.c_str()); // FLAG_cov = FLAG_pheno; // } // if (!FLAG_cov.empty()) { // logger->info("Begin to read covariate file."); // std::vector<std::string> columnNamesInCovariate; // std::set<std::string> sampleToDropInCovariate; // int ret = loadCovariate(FLAG_cov.c_str(), phenotypeNameInOrder, // FLAG_covName.c_str(), handleMissingCov, // &covariate, // &columnNamesInCovariate, // &sampleToDropInCovariate); // if (ret < 0) { // logger->error("Load covariate file failed !"); // exit(1); // } // // drop phenotype samples // if (!sampleToDropInCovariate.empty()) { // int idx = 0; // int n = phenotypeNameInOrder.size(); // for (int i = 0; i < n; ++i) { // if (sampleToDropInCovariate.count(phenotypeNameInOrder[i]) != // 0) { // need to drop // continue; // } // phenotypeNameInOrder[idx] = phenotypeNameInOrder[i]; // phenotypeInOrder[idx] = phenotypeInOrder[i]; // idx++; // } // phenotypeNameInOrder.resize(idx); // phenotypeInOrder.resize(idx); // logger->warn( // "[ %zu ] sample phenotypes are dropped due to lacking // covariates.", // sampleToDropInCovariate.size()); // } // // drop vcf samples; // for (std::set<std::string>::const_iterator iter = // sampleToDropInCovariate.begin(); // iter != sampleToDropInCovariate.end(); ++iter) { // ge.excludePeople(iter->c_str()); // } // } } else { dataLoader.loadMultiplePhenotype(FLAG_multiplePheno, FLAG_pheno, FLAG_cov); matchPhenotypeAndVCF("missing phenotype", &dataLoader, &ge); matchCovariateAndVCF("missing covariate", &dataLoader, &ge); } dataLoader.loadSex(); if (FLAG_sex) { dataLoader.useSexAsCovariate(); matchCovariateAndVCF("missing sex", &dataLoader, &ge); } // // load sex // std::vector<int> sex; // if (loadSex(FLAG_pheno, phenotypeNameInOrder, &sex)) { // logger->error("Cannot load sex of samples from phenotype file"); // exit(1); // } // if (FLAG_sex) { // append sex in covariate // std::vector<int> index; // mark missing samples // int numMissing = findMissingSex(sex, &index); // logger->info("Futher exclude %d samples with missing sex", numMissing); // removeByIndex(index, &sex); // excludeSamplesByIndex(index, &ge, &phenotypeNameInOrder, // &phenotypeInOrder, // &covariate); // appendToMatrix("Sex", sex, &covariate); // } if (!FLAG_condition.empty()) { dataLoader.loadMarkerAsCovariate(FLAG_inVcf, FLAG_condition); matchCovariateAndVCF("missing in conditioned marker(s)", &dataLoader, &ge); } // // load conditional markers // if (!FLAG_condition.empty()) { // Matrix geno; // std::vector<std::string> rowLabel; // if (loadMarkerFromVCF(FLAG_inVcf, FLAG_condition, &rowLabel, &geno) < 0) // { // logger->error("Load conditional markers [ %s ] from [ %s ] failed.", // FLAG_condition.c_str(), FLAG_inVcf.c_str()); // exit(1); // } // if (appendGenotype(&covariate, phenotypeNameInOrder, geno, rowLabel) < 0) // { // logger->error( // "Failed to combine conditional markers [ %s ] from [ %s ] failed.", // FLAG_condition.c_str(), FLAG_inVcf.c_str()); // exit(1); // } // } dataLoader.checkConstantCovariate(); // // check if some covariates are constant for all samples // // e.g. user may include covariate "1" in addition to intercept // // in such case, we will give a fatal error // for (int i = 0; i < covariate.cols; ++i) { // std::set<double> s; // s.clear(); // for (int j = 0; j < covariate.rows; ++j) { // s.insert(covariate[j][i]); // } // if (s.size() == 1) { // logger->error( // "Covariate [ %s ] equals [ %g ] for all samples, cannot fit " // "model...\n", // covariate.GetColumnLabel(i), *s.begin()); // exit(1); // } // } g_SummaryHeader = new SummaryHeader; g_SummaryHeader->recordCovariate(dataLoader.getCovariate()); // record raw phenotype g_SummaryHeader->recordPhenotype("Trait", dataLoader.getPhenotype().extractCol(0)); // adjust phenotype // bool binaryPhenotype; if (FLAG_qtl) { // binaryPhenotype = false; dataLoader.setTraitType(DataLoader::PHENOTYPE_QTL); logger->info("-- Force quantitative trait mode -- "); } else { if (dataLoader.detectPhenotypeType() == DataLoader::PHENOTYPE_BINARY) { logger->warn("-- Enabling binary phenotype mode -- "); dataLoader.setTraitType(DataLoader::PHENOTYPE_BINARY); } else { dataLoader.setTraitType(DataLoader::PHENOTYPE_QTL); } // binaryPhenotype = isBinaryPhenotype(phenotypeInOrder); // if (binaryPhenotype) { // logger->warn("-- Enabling binary phenotype mode -- "); // convertBinaryPhenotype(&phenotypeInOrder); // } } if (FLAG_useResidualAsPhenotype) { dataLoader.useResidualAsPhenotype(); g_SummaryHeader->recordEstimation(dataLoader.getEstimation()); } // // use residual as phenotype // if (FLAG_useResidualAsPhenotype) { // if (binaryPhenotype) { // logger->warn( // "WARNING: Skip transforming binary phenotype, although you want to // " // "use residual as phenotype!"); // } else { // if (covariate.cols > 0) { // LinearRegression lr; // Vector pheno; // Matrix covAndInt; // copy(phenotypeInOrder, &pheno); // copyCovariateAndIntercept(covariate.rows, covariate, &covAndInt); // if (!lr.FitLinearModel(covAndInt, pheno)) { // logger->error( // "Cannot fit model: [ phenotype ~ 1 + covariates ], now use the // " // "original phenotype"); // } else { // const int n = lr.GetResiduals().Length(); // for (int i = 0; i < n; ++i) { // phenotypeInOrder[i] = lr.GetResiduals()[i]; // } // covariate.Dimension(0, 0); // logger->info( // "DONE: Fit model [ phenotype ~ 1 + covariates ] and model " // "residuals will be used as responses."); // } // } else { // no covaraites // centerVector(&phenotypeInOrder); // logger->info("DONE: Use residual as phenotype by centerng it"); // } // } // } if (FLAG_inverseNormal) { dataLoader.inverseNormalizePhenotype(); g_SummaryHeader->setInverseNormalize(FLAG_inverseNormal); } // // phenotype transformation // if (FLAG_inverseNormal) { // if (binaryPhenotype) { // logger->warn( // "WARNING: Skip transforming binary phenotype, although you required // " // "inverse normalization!"); // } else { // logger->info("Now applying inverse normalize transformation."); // inverseNormalizeLikeMerlin(&phenotypeInOrder); // g_SummaryHeader->setInverseNormalize(FLAG_inverseNormal); // logger->info("DONE: inverse normal transformation finished."); // } // } g_SummaryHeader->recordPhenotype("AnalyzedTrait", dataLoader.getPhenotype().extractCol(0)); if (dataLoader.getPhenotype().nrow() == 0) { logger->fatal("There are 0 samples with valid phenotypes, quitting..."); exit(1); } // if (phenotypeInOrder.empty()) { // logger->fatal("There are 0 samples with valid phenotypes, quitting..."); // exit(1); // } logger->info("Analysis begins with [ %d ] samples...", dataLoader.getPhenotype().nrow()); ////////////////////////////////////////////////////////////////////////////// // prepare each model bool singleVariantMode = FLAG_modelSingle.size() || FLAG_modelMeta.size(); bool groupVariantMode = (FLAG_modelBurden.size() || FLAG_modelVT.size() || FLAG_modelKernel.size()); if (singleVariantMode && groupVariantMode) { logger->error("Cannot support both single variant and region based tests"); exit(1); } ModelManager modelManager(FLAG_outPrefix); // set up models in qtl/binary modes if (dataLoader.isBinaryPhenotype()) { modelManager.setBinaryOutcome(); matchPhenotypeAndVCF("missing phenotype (not case/control)", &dataLoader, &ge); } else { modelManager.setQuantitativeOutcome(); } // create models modelManager.create("single", FLAG_modelSingle); modelManager.create("burden", FLAG_modelBurden); modelManager.create("vt", FLAG_modelVT); modelManager.create("kernel", FLAG_modelKernel); modelManager.create("meta", FLAG_modelMeta); if (FLAG_outputRaw) { modelManager.create("outputRaw", "dump"); } const std::vector<ModelFitter*>& model = modelManager.getModel(); const std::vector<FileWriter*>& fOuts = modelManager.getResultFile(); const size_t numModel = model.size(); // TODO: optimize this by avoidding data copying Matrix phenotypeMatrix; Matrix covariate; toMatrix(dataLoader.getPhenotype(), &phenotypeMatrix); toMatrix(dataLoader.getCovariate(), &covariate); // determine VCF file reading pattern // current support: // * line by line ( including range selection) // * gene by gene // * range by range std::string rangeMode = "Single"; if (FLAG_geneFile.size() && (FLAG_setFile.size() || FLAG_setList.size())) { logger->error("Cannot specify both gene file and set file."); exit(1); } if (!FLAG_gene.empty() && FLAG_geneFile.empty()) { logger->error("Please provide gene file for gene bases analysis."); exit(1); } OrderedMap<std::string, RangeList> geneRange; if (FLAG_geneFile.size()) { rangeMode = "Gene"; int ret = loadGeneFile(FLAG_geneFile.c_str(), FLAG_gene.c_str(), &geneRange); if (ret < 0 || geneRange.size() == 0) { logger->error("Error loading gene file or gene list is empty!"); return -1; } else { logger->info("Loaded [ %zu ] genes.", geneRange.size()); } } if (!FLAG_set.empty() && FLAG_setFile.empty()) { logger->error("Please provide set file for set bases analysis."); exit(1); } if (FLAG_setFile.size()) { rangeMode = "Range"; int ret = loadRangeFile(FLAG_setFile.c_str(), FLAG_set.c_str(), &geneRange); if (ret < 0 || geneRange.size() == 0) { logger->error("Error loading set file or set list is empty!"); return -1; } else { logger->info("Loaded [ %zu ] set to tests.", geneRange.size()); } } if (FLAG_setList.size()) { rangeMode = "Range"; int ret = appendListToRange(FLAG_setList, &geneRange); if (ret < 0) { logger->error("Error loading set list or set list is empty!"); return -1; } } DataConsolidator dc; dc.setSex(&dataLoader.getSex()); dc.setFormula(&dataLoader.getFormula()); dc.setGenotypeCounter(ge.getGenotypeCounter()); // load kinshp if needed by family models if (modelManager.hasFamilyModel() || (!FLAG_modelMeta.empty() && !FLAG_kinship.empty())) { logger->info("Family-based model specified. Loading kinship file..."); // process auto kinship if (dc.setKinshipSample(dataLoader.getPhenotype().getRowName()) || dc.setKinshipFile(DataConsolidator::KINSHIP_AUTO, FLAG_kinship) || dc.setKinshipEigenFile(DataConsolidator::KINSHIP_AUTO, FLAG_kinshipEigen) || dc.loadKinship(DataConsolidator::KINSHIP_AUTO)) { logger->error( "Failed to load autosomal kinship (you may use vcf2kinship to " "generate one)."); exit(1); } if (dc.setKinshipFile(DataConsolidator::KINSHIP_X, FLAG_xHemiKinship) || dc.setKinshipEigenFile(DataConsolidator::KINSHIP_X, FLAG_xHemiKinshipEigen) || dc.loadKinship(DataConsolidator::KINSHIP_X)) { logger->warn( "Autosomal kinship loaded, but no hemizygote region kinship " "provided, some sex chromosome tests will be skipped."); // keep the program going } } else if (!FLAG_kinship.empty() && FLAG_modelMeta.empty()) { logger->info( "Family-based model not specified. Options related to kinship will be " "ignored here."); } // set imputation method if (FLAG_impute.empty()) { logger->info("Impute missing genotype to mean (by default)"); dc.setStrategy(DataConsolidator::IMPUTE_MEAN); } else if (FLAG_impute == "mean") { logger->info("Impute missing genotype to mean"); dc.setStrategy(DataConsolidator::IMPUTE_MEAN); } else if (FLAG_impute == "hwe") { logger->info("Impute missing genotype by HWE"); dc.setStrategy(DataConsolidator::IMPUTE_HWE); } else if (FLAG_impute == "drop") { logger->info("Drop missing genotypes"); dc.setStrategy(DataConsolidator::DROP); } dc.setPhenotypeName(dataLoader.getPhenotype().getRowName()); // set up par region ParRegion parRegion(FLAG_xLabel, FLAG_xParRegion); dc.setParRegion(&parRegion); // genotype will be extracted and stored Matrix& genotype = dc.getOriginalGenotype(); if (FLAG_freqUpper > 0) { ge.setSiteFreqMax(FLAG_freqUpper); logger->info("Set upper minor allele frequency limit to %g", FLAG_freqUpper); } if (FLAG_freqLower > 0) { ge.setSiteFreqMin(FLAG_freqLower); logger->info("Set lower minor allele frequency limit to %g", FLAG_freqLower); } // handle sex chromosome ge.setParRegion(&parRegion); ge.setSex(&dataLoader.getSex()); // use dosage instead GT if (!FLAG_dosageTag.empty()) { ge.setDosageTag(FLAG_dosageTag); logger->info("Use dosage genotype from VCF flag %s.", FLAG_dosageTag.c_str()); } // genotype QC options if (FLAG_indvDepthMin > 0) { ge.setGDmin(FLAG_indvDepthMin); logger->info("Minimum GD set to %d (or marked as missing genotype).", FLAG_indvDepthMin); } if (FLAG_indvDepthMax > 0) { ge.setGDmax(FLAG_indvDepthMax); logger->info("Maximum GD set to %d (or marked as missing genotype).", FLAG_indvDepthMax); } if (FLAG_indvQualMin > 0) { ge.setGQmin(FLAG_indvQualMin); logger->info("Minimum GQ set to %d (or marked as missing genotype).", FLAG_indvQualMin); } dc.preRegressionCheck(phenotypeMatrix, covariate); logger->info("Analysis started"); Result& buf = dc.getResult(); // we have three modes: // * single variant reading, single variant test // * range variant reading, single variant test // * range variant reading, group variant test if (rangeMode == "Single" && singleVariantMode) { // use line by line mode buf.addHeader("CHROM"); buf.addHeader("POS"); buf.addHeader("REF"); buf.addHeader("ALT"); buf.addHeader("N_INFORMATIVE"); // output headers for (size_t m = 0; m < model.size(); m++) { model[m]->writeHeader(fOuts[m], buf); } int variantProcessed = 0; while (true) { buf.clearValue(); int ret = ge.extractSingleGenotype(&genotype, &buf); if (ret == GenotypeExtractor::FILE_END) { // reach file end break; } if (ret == GenotypeExtractor::FAIL_FILTER) { continue; } if (ret != GenotypeExtractor::SUCCEED) { logger->error("Extract genotype failed at site: %s:%s!", buf["CHROM"].c_str(), buf["POS"].c_str()); continue; } if (genotype.cols == 0) { logger->warn("Extract [ %s:%s ] has 0 variants, skipping", buf["CHROM"].c_str(), buf["POS"].c_str()); continue; } ++variantProcessed; dc.consolidate(phenotypeMatrix, covariate, genotype); buf.updateValue("N_INFORMATIVE", toString(genotype.rows)); // fit each model for (size_t m = 0; m != numModel; m++) { model[m]->reset(); model[m]->fit(&dc); model[m]->writeOutput(fOuts[m], buf); } } logger->info("Analyzed [ %d ] variants", variantProcessed); } else if (rangeMode != "Single" && singleVariantMode) { // read by gene/range model, single variant // test buf.addHeader(rangeMode); buf.addHeader("CHROM"); buf.addHeader("POS"); buf.addHeader("REF"); buf.addHeader("ALT"); buf.addHeader("N_INFORMATIVE"); // output headers for (size_t m = 0; m < numModel; m++) { model[m]->writeHeader(fOuts[m], buf); } std::string geneName; RangeList rangeList; int variantProcessed = 0; for (size_t i = 0; i < geneRange.size(); ++i) { geneRange.at(i, &geneName, &rangeList); ge.setRange(rangeList); while (true) { buf.clearValue(); int ret = ge.extractSingleGenotype(&genotype, &buf); if (ret == GenotypeExtractor::FILE_END) { // reach end of this region break; } if (ret == GenotypeExtractor::FAIL_FILTER) { continue; } if (ret != GenotypeExtractor::SUCCEED) { logger->error("Extract genotype failed for gene %s!", geneName.c_str()); continue; } if (genotype.cols == 0) { logger->warn("Gene %s has 0 variants, skipping", geneName.c_str()); continue; } ++variantProcessed; dc.consolidate(phenotypeMatrix, covariate, genotype); buf.updateValue(rangeMode, geneName); buf.updateValue("N_INFORMATIVE", genotype.rows); // #pragma omp parallel for for (size_t m = 0; m != numModel; m++) { model[m]->reset(); model[m]->fit(&dc); model[m]->writeOutput(fOuts[m], buf); } } } logger->info("Analyzed [ %d ] variants from [ %d ] genes/regions", variantProcessed, (int)geneRange.size()); } else if (rangeMode != "Single" && groupVariantMode) { // read by gene/range mode, group variant // test buf.addHeader(rangeMode); buf.addHeader("RANGE"); buf.addHeader("N_INFORMATIVE"); buf.addHeader("NumVar"); buf.addHeader("NumPolyVar"); // output headers for (size_t m = 0; m < numModel; m++) { model[m]->writeHeader(fOuts[m], buf); } std::string geneName; RangeList rangeList; int variantProcessed = 0; ge.enableAutoMerge(); for (size_t i = 0; i < geneRange.size(); ++i) { geneRange.at(i, &geneName, &rangeList); ge.setRange(rangeList); buf.clearValue(); int ret = ge.extractMultipleGenotype(&genotype); if (ret != GenotypeExtractor::SUCCEED) { logger->error("Extract genotype failed for gene %s!", geneName.c_str()); continue; } if (genotype.cols == 0) { logger->info("Gene %s has 0 variants, skipping", geneName.c_str()); continue; } variantProcessed += genotype.cols; // genotype is people by marker dc.consolidate(phenotypeMatrix, covariate, genotype); buf.updateValue(rangeMode, geneName); buf.updateValue("RANGE", rangeList.toString()); buf.updateValue("N_INFORMATIVE", genotype.rows); buf.updateValue("NumVar", genotype.cols); buf.updateValue("NumPolyVar", dc.getFlippedToMinorPolymorphicGenotype().cols); // #ifdef _OPENMP // #pragma omp parallel for // #endif for (size_t m = 0; m != numModel; m++) { model[m]->reset(); model[m]->fit(&dc); model[m]->writeOutput(fOuts[m], buf); } } logger->info("Analyzed [ %d ] variants from [ %d ] genes/regions", variantProcessed, (int)geneRange.size()); } else { logger->error( "Unsupported reading mode and test modes! (need more parameters?)"); exit(1); } // Resource cleaning up modelManager.close(); delete g_SummaryHeader; time_t endTime = time(0); logger->info("Analysis ends at: %s", currentTime().c_str()); int elapsedSecond = (int)(endTime - startTime); logger->info("Analysis took %d seconds", elapsedSecond); return 0; }
int main(int argc, char** argv) { PARSE_PARAMETER(argc, argv); if (FLAG_help) { PARAMETER_HELP(); return 0; } welcome(); PARAMETER_STATUS(); if (FLAG_REMAIN_ARG.size() > 0) { fprintf(stderr, "Unparsed arguments: "); for (unsigned int i = 0; i < FLAG_REMAIN_ARG.size(); i++) { fprintf(stderr, " %s", FLAG_REMAIN_ARG[i].c_str()); } exit(1); } if (!FLAG_outPrefix.size()) FLAG_outPrefix = "rvtest"; if ((FLAG_inVcf.empty() ? 0 : 1) + (FLAG_inBgen.empty() ? 0 : 1) + (FLAG_inKgg.empty() ? 0 : 1) != 1) { fprintf(stderr, "Please provide one type of input file using: --inVcf, --inBgen or " "--inKgg\n"); exit(1); } // check new version if (!FLAG_noweb) { VersionChecker ver; if (ver.retrieveRemoteVersion("http://zhanxw.com/rvtests/version") < 0) { fprintf(stderr, "Retrieve remote version failed, use '--noweb' to skip.\n"); } else { ver.setLocalVersion(VERSION); if (ver.isRemoteVersionNewer()) { fprintf(stderr, "New version of rvtests is available:"); ver.printRemoteContent(); } } } // start logging Logger _logger((FLAG_outPrefix + ".log").c_str()); logger = &_logger; logger->info("Program version: %s", VERSION); logger->infoToFile("Git Version: %s", GIT_VERSION); logger->infoToFile("Parameters BEGIN"); PARAMETER_INSTANCE().WriteToFile(logger->getHandle()); logger->infoToFile("Parameters END"); logger->sync(); // set up multithreading #ifdef _OPENMP if (FLAG_numThread <= 0) { fprintf(stderr, "Invalid number of threads [ %d ], reset to single thread", FLAG_numThread); omp_set_num_threads(1); } else if (FLAG_numThread > omp_get_max_threads()) { int maxThreads = omp_get_max_threads(); fprintf(stderr, "Reduced your specified number of threads to the maximum of system " "limit [ %d ]", maxThreads); omp_set_num_threads(maxThreads); } else if (FLAG_numThread == 1) { // need to set to one thread, otherwise all CPUs may be used omp_set_num_threads(1); } else { logger->info("Set number of threads = [ %d ]", FLAG_numThread); omp_set_num_threads(FLAG_numThread); } #endif // start analysis time_t startTime = time(0); logger->info("Analysis started at: %s", currentTime().c_str()); GenotypeExtractor* ge = NULL; if (!FLAG_inVcf.empty()) { ge = new VCFGenotypeExtractor(FLAG_inVcf); } else if (!FLAG_inBgen.empty()) { ge = new BGenGenotypeExtractor(FLAG_inBgen, FLAG_inBgenSample); } else if (!FLAG_inKgg.empty()) { ge = new KGGGenotypeExtractor(FLAG_inKgg); } else { assert(false); } // set range filters here ge->setRangeList(FLAG_rangeList.c_str()); ge->setRangeFile(FLAG_rangeFile.c_str()); // set people filters here if (FLAG_peopleIncludeID.size() || FLAG_peopleIncludeFile.size()) { ge->excludeAllPeople(); ge->includePeople(FLAG_peopleIncludeID.c_str()); ge->includePeopleFromFile(FLAG_peopleIncludeFile.c_str()); } ge->excludePeople(FLAG_peopleExcludeID.c_str()); ge->excludePeopleFromFile(FLAG_peopleExcludeFile.c_str()); if (!FLAG_siteFile.empty()) { ge->setSiteFile(FLAG_siteFile); logger->info("Restrict analysis based on specified site file [ %s ]", FLAG_siteFile.c_str()); } if (FLAG_siteDepthMin > 0) { ge->setSiteDepthMin(FLAG_siteDepthMin); logger->info("Set site depth minimum to %d", FLAG_siteDepthMin); } if (FLAG_siteDepthMax > 0) { ge->setSiteDepthMax(FLAG_siteDepthMax); logger->info("Set site depth maximum to %d", FLAG_siteDepthMax); } if (FLAG_siteMACMin > 0) { ge->setSiteMACMin(FLAG_siteMACMin); logger->info("Set site minimum MAC to %d", FLAG_siteDepthMin); } if (FLAG_annoType != "") { ge->setAnnoType(FLAG_annoType.c_str()); logger->info("Set annotype type filter to %s", FLAG_annoType.c_str()); } std::vector<std::string> vcfSampleNames; ge->getPeopleName(&vcfSampleNames); logger->info("Loaded [ %zu ] samples from genotype files", vcfSampleNames.size()); DataLoader dataLoader; dataLoader.setPhenotypeImputation(FLAG_imputePheno); dataLoader.setCovariateImputation(FLAG_imputeCov); if (FLAG_multiplePheno.empty()) { dataLoader.loadPhenotype(FLAG_pheno, FLAG_mpheno, FLAG_phenoName); // // load phenotypes // std::map<std::string, double> phenotype; // if (FLAG_pheno.empty()) { // logger->error("Cannot do association when phenotype is missing!"); // return -1; // } // // check if alternative phenotype columns are used // if (!FLAG_mpheno.empty() && !FLAG_phenoName.empty()) { // logger->error("Please specify either --mpheno or --pheno-name"); // return -1; // } // if (!FLAG_mpheno.empty()) { // int col = atoi(FLAG_mpheno); // int ret = loadPedPhenotypeByColumn(FLAG_pheno.c_str(), &phenotype, // col); // if (ret < 0) { // logger->error("Loading phenotype failed!"); // return -1; // } // } else if (!FLAG_phenoName.empty()) { // int ret = loadPedPhenotypeByHeader(FLAG_pheno.c_str(), &phenotype, // FLAG_phenoName.c_str()); // if (ret < 0) { // logger->error("Loading phenotype failed!"); // return -1; // } // } else { // int col = 1; // default use the first phenotype // int ret = loadPedPhenotypeByColumn(FLAG_pheno.c_str(), &phenotype, // col); // if (ret < 0) { // logger->error("Loading phenotype failed!"); // return -1; // } // } // logger->info("Loaded [ %zu ] sample phenotypes.", phenotype.size()); // rearrange phenotypes // drop samples from phenotype or vcf matchPhenotypeAndVCF("missing phenotype", &dataLoader, ge); // // phenotype names (vcf sample names) arranged in the same order as in // VCF // std::vector<std::string> phenotypeNameInOrder; // std::vector<double> // phenotypeInOrder; // phenotype arranged in the same order as in VCF // rearrange(phenotype, vcfSampleNames, &vcfSampleToDrop, // &phenotypeNameInOrder, // &phenotypeInOrder, FLAG_imputePheno); // if (vcfSampleToDrop.size()) { // // exclude this sample from parsing VCF // ge->excludePeople(vcfSampleToDrop); // // output dropped samples // for (size_t i = 0; i < vcfSampleToDrop.size(); ++i) { // if (i == 0) // logger->warn( // "Total [ %zu ] samples are dropped from VCF file due to missing // " // "phenotype", // vcfSampleToDrop.size()); // if (i >= 10) { // logger->warn( // "Skip outputting additional [ %d ] samples with missing " // "phenotypes.", // ((int)vcfSampleToDrop.size() - 10)); // break; // } // logger->warn("Drop sample [ %s ] from VCF file due to missing // phenotype", // (vcfSampleToDrop)[i].c_str()); // } // // logger->warn("Drop %zu sample from VCF file since we don't have // their // // phenotypes", vcfSampleToDrop.size()); // } // if (phenotypeInOrder.size() != phenotype.size()) { // logger->warn( // "Drop [ %d ] samples from phenotype file due to missing genotypes // from " // "VCF files", // (int)(phenotype.size() - phenotypeInOrder.size())); // // We may output these samples by comparing keys of phenotype and // // phenotypeNameInOrder // } dataLoader.loadCovariate(FLAG_cov, FLAG_covName); matchCovariateAndVCF("missing covariate", &dataLoader, ge); // // load covariate // Matrix covariate; // HandleMissingCov handleMissingCov = COVARIATE_DROP; // if (FLAG_imputeCov) { // handleMissingCov = COVARIATE_IMPUTE; // } // if (FLAG_cov.empty() && !FLAG_covName.empty()) { // logger->info("Use phenotype file as covariate file [ %s ]", // FLAG_pheno.c_str()); // FLAG_cov = FLAG_pheno; // } // if (!FLAG_cov.empty()) { // logger->info("Begin to read covariate file."); // std::vector<std::string> columnNamesInCovariate; // std::set<std::string> sampleToDropInCovariate; // int ret = loadCovariate(FLAG_cov.c_str(), phenotypeNameInOrder, // FLAG_covName.c_str(), handleMissingCov, // &covariate, // &columnNamesInCovariate, // &sampleToDropInCovariate); // if (ret < 0) { // logger->error("Load covariate file failed !"); // exit(1); // } // // drop phenotype samples // if (!sampleToDropInCovariate.empty()) { // int idx = 0; // int n = phenotypeNameInOrder.size(); // for (int i = 0; i < n; ++i) { // if (sampleToDropInCovariate.count(phenotypeNameInOrder[i]) != // 0) { // need to drop // continue; // } // phenotypeNameInOrder[idx] = phenotypeNameInOrder[i]; // phenotypeInOrder[idx] = phenotypeInOrder[i]; // idx++; // } // phenotypeNameInOrder.resize(idx); // phenotypeInOrder.resize(idx); // logger->warn( // "[ %zu ] sample phenotypes are dropped due to lacking // covariates.", // sampleToDropInCovariate.size()); // } // // drop vcf samples; // for (std::set<std::string>::const_iterator iter = // sampleToDropInCovariate.begin(); // iter != sampleToDropInCovariate.end(); ++iter) { // ge->excludePeople(iter->c_str()); // } // } } else { dataLoader.loadMultiplePhenotype(FLAG_multiplePheno, FLAG_pheno, FLAG_cov); matchPhenotypeAndVCF("missing phenotype", &dataLoader, ge); matchCovariateAndVCF("missing covariate", &dataLoader, ge); } dataLoader.loadSex(); if (FLAG_sex) { dataLoader.useSexAsCovariate(); matchCovariateAndVCF("missing sex", &dataLoader, ge); } // // load sex // std::vector<int> sex; // if (loadSex(FLAG_pheno, phenotypeNameInOrder, &sex)) { // logger->error("Cannot load sex of samples from phenotype file"); // exit(1); // } // if (FLAG_sex) { // append sex in covariate // std::vector<int> index; // mark missing samples // int numMissing = findMissingSex(sex, &index); // logger->info("Futher exclude %d samples with missing sex", numMissing); // removeByIndex(index, &sex); // excludeSamplesByIndex(index, &ge, &phenotypeNameInOrder, // &phenotypeInOrder, // &covariate); // appendToMatrix("Sex", sex, &covariate); // } if (!FLAG_condition.empty()) { dataLoader.loadMarkerAsCovariate(FLAG_inVcf, FLAG_condition); matchCovariateAndVCF("missing in conditioned marker(s)", &dataLoader, ge); } // // load conditional markers // if (!FLAG_condition.empty()) { // Matrix geno; // std::vector<std::string> rowLabel; // if (loadMarkerFromVCF(FLAG_inVcf, FLAG_condition, &rowLabel, &geno) < 0) // { // logger->error("Load conditional markers [ %s ] from [ %s ] failed.", // FLAG_condition.c_str(), FLAG_inVcf.c_str()); // exit(1); // } // if (appendGenotype(&covariate, phenotypeNameInOrder, geno, rowLabel) < 0) // { // logger->error( // "Failed to combine conditional markers [ %s ] from [ %s ] failed.", // FLAG_condition.c_str(), FLAG_inVcf.c_str()); // exit(1); // } // } dataLoader.checkConstantCovariate(); // // check if some covariates are constant for all samples // // e.g. user may include covariate "1" in addition to intercept // // in such case, we will give a fatal error // for (int i = 0; i < covariate.cols; ++i) { // std::set<double> s; // s.clear(); // for (int j = 0; j < covariate.rows; ++j) { // s.insert(covariate(j,i)); // } // if (s.size() == 1) { // logger->error( // "Covariate [ %s ] equals [ %g ] for all samples, cannot fit " // "model...\n", // covariate.GetColumnLabel(i), *s.begin()); // exit(1); // } // } g_SummaryHeader = new SummaryHeader; g_SummaryHeader->recordCovariate(dataLoader.getCovariate()); // record raw phenotype g_SummaryHeader->recordPhenotype("Trait", dataLoader.getPhenotype().extractCol(0)); // adjust phenotype // bool binaryPhenotype; if (FLAG_qtl) { // binaryPhenotype = false; dataLoader.setTraitType(DataLoader::PHENOTYPE_QTL); logger->info("-- Force quantitative trait mode -- "); } else { if (dataLoader.detectPhenotypeType() == DataLoader::PHENOTYPE_BINARY) { logger->warn("-- Enabling binary phenotype mode -- "); dataLoader.setTraitType(DataLoader::PHENOTYPE_BINARY); } else { dataLoader.setTraitType(DataLoader::PHENOTYPE_QTL); } // binaryPhenotype = isBinaryPhenotype(phenotypeInOrder); // if (binaryPhenotype) { // logger->warn("-- Enabling binary phenotype mode -- "); // convertBinaryPhenotype(&phenotypeInOrder); // } } if (FLAG_useResidualAsPhenotype) { dataLoader.useResidualAsPhenotype(); g_SummaryHeader->recordEstimation(dataLoader.getEstimation()); } // // use residual as phenotype // if (FLAG_useResidualAsPhenotype) { // if (binaryPhenotype) { // logger->warn( // "WARNING: Skip transforming binary phenotype, although you want to // " // "use residual as phenotype!"); // } else { // if (covariate.cols > 0) { // LinearRegression lr; // Vector pheno; // Matrix covAndInt; // copy(phenotypeInOrder, &pheno); // copyCovariateAndIntercept(covariate.rows, covariate, &covAndInt); // if (!lr.FitLinearModel(covAndInt, pheno)) { // logger->error( // "Cannot fit model: [ phenotype ~ 1 + covariates ], now use the // " // "original phenotype"); // } else { // const int n = lr.GetResiduals().Length(); // for (int i = 0; i < n; ++i) { // phenotypeInOrder[i] = lr.GetResiduals()[i]; // } // covariate.Dimension(0, 0); // logger->info( // "DONE: Fit model [ phenotype ~ 1 + covariates ] and model " // "residuals will be used as responses."); // } // } else { // no covaraites // centerVector(&phenotypeInOrder); // logger->info("DONE: Use residual as phenotype by centerng it"); // } // } // } if (FLAG_inverseNormal) { dataLoader.inverseNormalizePhenotype(); g_SummaryHeader->setInverseNormalize(FLAG_inverseNormal); } // // phenotype transformation // if (FLAG_inverseNormal) { // if (binaryPhenotype) { // logger->warn( // "WARNING: Skip transforming binary phenotype, although you required // " // "inverse normalization!"); // } else { // logger->info("Now applying inverse normalize transformation."); // inverseNormalizeLikeMerlin(&phenotypeInOrder); // g_SummaryHeader->setInverseNormalize(FLAG_inverseNormal); // logger->info("DONE: inverse normalization transformation finished."); // } // } g_SummaryHeader->recordPhenotype("AnalyzedTrait", dataLoader.getPhenotype().extractCol(0)); if (dataLoader.getPhenotype().nrow() == 0) { logger->fatal("There are 0 samples with valid phenotypes, quitting..."); exit(1); } // if (phenotypeInOrder.empty()) { // logger->fatal("There are 0 samples with valid phenotypes, quitting..."); // exit(1); // } logger->info("Analysis begins with [ %d ] samples...", dataLoader.getPhenotype().nrow()); ////////////////////////////////////////////////////////////////////////////// // prepare each model bool singleVariantMode = FLAG_modelSingle.size() || FLAG_modelMeta.size(); bool groupVariantMode = (FLAG_modelBurden.size() || FLAG_modelVT.size() || FLAG_modelKernel.size()); if (singleVariantMode && groupVariantMode) { logger->error("Cannot support both single variant and region based tests"); exit(1); } ModelManager modelManager(FLAG_outPrefix); // set up models in qtl/binary modes if (dataLoader.isBinaryPhenotype()) { modelManager.setBinaryOutcome(); matchPhenotypeAndVCF("missing phenotype (not case/control)", &dataLoader, ge); } else { modelManager.setQuantitativeOutcome(); } // create models modelManager.create("single", FLAG_modelSingle); modelManager.create("burden", FLAG_modelBurden); modelManager.create("vt", FLAG_modelVT); modelManager.create("kernel", FLAG_modelKernel); modelManager.create("meta", FLAG_modelMeta); if (FLAG_outputRaw) { modelManager.create("outputRaw", "dump"); } const std::vector<ModelFitter*>& model = modelManager.getModel(); const std::vector<FileWriter*>& fOuts = modelManager.getResultFile(); const size_t numModel = model.size(); // TODO: optimize this to avoid data copying Matrix phenotypeMatrix; Matrix covariate; toMatrix(dataLoader.getPhenotype(), &phenotypeMatrix); toMatrix(dataLoader.getCovariate(), &covariate); // determine VCF file reading pattern // current support: // * line by line ( including range selection) // * gene by gene // * range by range std::string rangeMode = "Single"; if (FLAG_geneFile.size() && (FLAG_setFile.size() || FLAG_setList.size())) { logger->error("Cannot specify both gene file and set file."); exit(1); } if (!FLAG_gene.empty() && FLAG_geneFile.empty()) { logger->error("Please provide gene file for gene bases analysis."); exit(1); } OrderedMap<std::string, RangeList> geneRange; if (FLAG_geneFile.size()) { rangeMode = "Gene"; int ret = loadGeneFile(FLAG_geneFile.c_str(), FLAG_gene.c_str(), &geneRange); if (ret < 0 || geneRange.size() == 0) { logger->error("Error loading gene file or gene list is empty!"); return -1; } else { logger->info("Loaded [ %zu ] genes.", geneRange.size()); } } if (!FLAG_set.empty() && FLAG_setFile.empty()) { logger->error("Please provide set file for set bases analysis."); exit(1); } if (FLAG_setFile.size()) { rangeMode = "Range"; int ret = loadRangeFile(FLAG_setFile.c_str(), FLAG_set.c_str(), &geneRange); if (ret < 0 || geneRange.size() == 0) { logger->error("Error loading set file or set list is empty!"); return -1; } else { logger->info("Loaded [ %zu ] set to tests.", geneRange.size()); } } if (FLAG_setList.size()) { rangeMode = "Range"; int ret = appendListToRange(FLAG_setList, &geneRange); if (ret < 0) { logger->error("Error loading set list or set list is empty!"); return -1; } } DataConsolidator dc; dc.setSex(&dataLoader.getSex()); dc.setFormula(&dataLoader.getFormula()); dc.setGenotypeCounter(ge->getGenotypeCounter()); // load kinshp if needed by family models if (modelManager.hasFamilyModel() || (!FLAG_modelMeta.empty() && !FLAG_kinship.empty())) { logger->info("Family-based model specified. Loading kinship file..."); // process auto kinship if (dc.setKinshipSample(dataLoader.getPhenotype().getRowName()) || dc.setKinshipFile(DataConsolidator::KINSHIP_AUTO, FLAG_kinship) || dc.setKinshipEigenFile(DataConsolidator::KINSHIP_AUTO, FLAG_kinshipEigen) || dc.loadKinship(DataConsolidator::KINSHIP_AUTO)) { logger->error( "Failed to load autosomal kinship (you may use vcf2kinship to " "generate one)."); exit(1); } if (dc.setKinshipFile(DataConsolidator::KINSHIP_X, FLAG_xHemiKinship) || dc.setKinshipEigenFile(DataConsolidator::KINSHIP_X, FLAG_xHemiKinshipEigen) || dc.loadKinship(DataConsolidator::KINSHIP_X)) { logger->warn( "Autosomal kinship loaded, but no hemizygote region kinship " "provided, some sex chromosome tests will be skipped."); // keep the program going } } else if (!FLAG_kinship.empty() && FLAG_modelMeta.empty()) { logger->info( "Family-based model not specified. Options related to kinship will be " "ignored here."); } // set imputation method if (FLAG_impute.empty()) { logger->info("Impute missing genotype to mean (by default)"); dc.setStrategy(DataConsolidator::IMPUTE_MEAN); } else if (FLAG_impute == "mean") { logger->info("Impute missing genotype to mean"); dc.setStrategy(DataConsolidator::IMPUTE_MEAN); } else if (FLAG_impute == "hwe") { logger->info("Impute missing genotype by HWE"); dc.setStrategy(DataConsolidator::IMPUTE_HWE); } else if (FLAG_impute == "drop") { logger->info("Drop missing genotypes"); dc.setStrategy(DataConsolidator::DROP); } dc.setPhenotypeName(dataLoader.getPhenotype().getRowName()); // set up par region ParRegion parRegion(FLAG_xLabel, FLAG_xParRegion); dc.setParRegion(&parRegion); // genotype will be extracted and stored if (FLAG_freqUpper > 0) { ge->setSiteFreqMax(FLAG_freqUpper); logger->info("Set upper minor allele frequency limit to %g", FLAG_freqUpper); } if (FLAG_freqLower > 0) { ge->setSiteFreqMin(FLAG_freqLower); logger->info("Set lower minor allele frequency limit to %g", FLAG_freqLower); } // handle sex chromosome ge->setParRegion(&parRegion); ge->setSex(&dataLoader.getSex()); // use dosage instead GT if (!FLAG_dosageTag.empty()) { ge->setDosageTag(FLAG_dosageTag); logger->info("Use dosage genotype from VCF flag %s.", FLAG_dosageTag.c_str()); } // multi-allelic sites will be treats as ref/alt1, ref/alt2, ref/alt3.. // instead of ref/alt1 (biallelic) if (FLAG_multiAllele) { ge->enableMultiAllelicMode(); logger->info("Enable analysis using multiple allelic models"); } // genotype QC options if (FLAG_indvDepthMin > 0) { ge->setGDmin(FLAG_indvDepthMin); logger->info("Minimum GD set to %d (or marked as missing genotype).", FLAG_indvDepthMin); } if (FLAG_indvDepthMax > 0) { ge->setGDmax(FLAG_indvDepthMax); logger->info("Maximum GD set to %d (or marked as missing genotype).", FLAG_indvDepthMax); } if (FLAG_indvQualMin > 0) { ge->setGQmin(FLAG_indvQualMin); logger->info("Minimum GQ set to %d (or marked as missing genotype).", FLAG_indvQualMin); } // e.g. check colinearity and correlations between predictors dc.preRegressionCheck(phenotypeMatrix, covariate); // prepare PLINK files for BoltLMM model if (!FLAG_boltPlink.empty()) { if (dc.prepareBoltModel(FLAG_boltPlink, dataLoader.getPhenotype().getRowName(), dataLoader.getPhenotype())) { logger->error( "Failed to prepare inputs for BOLT-LMM association test model with " "this prefix [ %s ]!", FLAG_boltPlink.c_str()); exit(1); } } logger->info("Analysis started"); Result& buf = dc.getResult(); Matrix& genotype = dc.getOriginalGenotype(); // we have three modes: // * single variant reading, single variant test // * range variant reading, single variant test // * range variant reading, group variant test if (rangeMode == "Single" && singleVariantMode) { // use line by line mode buf.addHeader("CHROM"); buf.addHeader("POS"); if (FLAG_outputID) { buf.addHeader("ID"); } buf.addHeader("REF"); buf.addHeader("ALT"); buf.addHeader("N_INFORMATIVE"); // output headers for (size_t m = 0; m < model.size(); m++) { model[m]->writeHeader(fOuts[m], buf); } int variantProcessed = 0; while (true) { buf.clearValue(); int ret = ge->extractSingleGenotype(&genotype, &buf); if (ret == GenotypeExtractor::FILE_END) { // reach file end break; } if (ret == GenotypeExtractor::FAIL_FILTER) { continue; } if (ret != GenotypeExtractor::SUCCEED) { logger->error("Extract genotype failed at site: %s:%s!", buf["CHROM"].c_str(), buf["POS"].c_str()); continue; } if (genotype.cols == 0) { logger->warn("Extract [ %s:%s ] has 0 variants, skipping", buf["CHROM"].c_str(), buf["POS"].c_str()); continue; } ++variantProcessed; dc.consolidate(phenotypeMatrix, covariate, genotype); buf.updateValue("N_INFORMATIVE", toString(genotype.rows)); // logger->info("Test variant at site: %s:%s!", // buf["CHROM"].c_str(), buf["POS"].c_str()); // fit each model for (size_t m = 0; m != numModel; m++) { model[m]->reset(); model[m]->fit(&dc); model[m]->writeOutput(fOuts[m], buf); } } logger->info("Analyzed [ %d ] variants", variantProcessed); } else if (rangeMode != "Single" && singleVariantMode) { // read by gene/range model, single variant // test buf.addHeader(rangeMode); buf.addHeader("CHROM"); buf.addHeader("POS"); if (FLAG_outputID) { buf.addHeader("ID"); } buf.addHeader("REF"); buf.addHeader("ALT"); buf.addHeader("N_INFORMATIVE"); // output headers for (size_t m = 0; m < numModel; m++) { model[m]->writeHeader(fOuts[m], buf); } std::string geneName; RangeList rangeList; int variantProcessed = 0; for (size_t i = 0; i < geneRange.size(); ++i) { geneRange.at(i, &geneName, &rangeList); ge->setRange(rangeList); while (true) { buf.clearValue(); int ret = ge->extractSingleGenotype(&genotype, &buf); if (ret == GenotypeExtractor::FILE_END) { // reach end of this region break; } if (ret == GenotypeExtractor::FAIL_FILTER) { continue; } if (ret != GenotypeExtractor::SUCCEED) { logger->error("Extract genotype failed for gene %s!", geneName.c_str()); continue; } if (genotype.cols == 0) { logger->warn("Gene %s has 0 variants, skipping", geneName.c_str()); continue; } ++variantProcessed; dc.consolidate(phenotypeMatrix, covariate, genotype); buf.updateValue(rangeMode, geneName); buf.updateValue("N_INFORMATIVE", genotype.rows); // #pragma omp parallel for for (size_t m = 0; m != numModel; m++) { model[m]->reset(); model[m]->fit(&dc); model[m]->writeOutput(fOuts[m], buf); } } } logger->info("Analyzed [ %d ] variants from [ %d ] genes/regions", variantProcessed, (int)geneRange.size()); } else if (rangeMode != "Single" && groupVariantMode) { // read by gene/range mode, group variant // test buf.addHeader(rangeMode); buf.addHeader("RANGE"); buf.addHeader("N_INFORMATIVE"); buf.addHeader("NumVar"); buf.addHeader("NumPolyVar"); // output headers for (size_t m = 0; m < numModel; m++) { model[m]->writeHeader(fOuts[m], buf); } std::string geneName; RangeList rangeList; int variantProcessed = 0; ge->enableAutoMerge(); for (size_t i = 0; i < geneRange.size(); ++i) { geneRange.at(i, &geneName, &rangeList); ge->setRange(rangeList); buf.clearValue(); int ret = ge->extractMultipleGenotype(&genotype); if (ret != GenotypeExtractor::SUCCEED) { logger->error("Extract genotype failed for gene %s!", geneName.c_str()); continue; } if (genotype.cols == 0) { logger->info("Gene %s has 0 variants, skipping", geneName.c_str()); continue; } variantProcessed += genotype.cols; // genotype is people by marker dc.consolidate(phenotypeMatrix, covariate, genotype); buf.updateValue(rangeMode, geneName); buf.updateValue("RANGE", rangeList.toString()); buf.updateValue("N_INFORMATIVE", genotype.rows); buf.updateValue("NumVar", genotype.cols); buf.updateValue("NumPolyVar", dc.getFlippedToMinorPolymorphicGenotype().cols); // #ifdef _OPENMP // #pragma omp parallel for // #endif for (size_t m = 0; m != numModel; m++) { model[m]->reset(); model[m]->fit(&dc); model[m]->writeOutput(fOuts[m], buf); } } logger->info("Analyzed [ %d ] variants from [ %d ] genes/regions", variantProcessed, (int)geneRange.size()); } else { logger->error( "Unsupported reading mode and test modes! (need more parameters?)"); exit(1); } // Resource cleaning up modelManager.close(); delete g_SummaryHeader; time_t endTime = time(0); logger->info("Analysis ends at: %s", currentTime().c_str()); int elapsedSecond = (int)(endTime - startTime); logger->info("Analysis took %d seconds", elapsedSecond); fputs("RVTESTS finished successfully\n", stdout); return 0; }