void GenotypeList::Print(GenotypeList * list, Pedigree & ped, Family * family, int marker) { MarkerInfo * info = ped.GetMarkerInfo(marker); for (int i = 0; i < family->count; i++) { printf("%s - ", (const char *) ped[family->path[i]].pid); for (int j = 0; j < list[i].allele1.Length(); j++) { if (list[i].allele1[j] == -1) printf("*/"); else printf("%s/", (const char *) info->GetAlleleLabel(list[i].allele1[j])); if (list[i].allele2[j] == -1) printf("* "); else printf("%s ", (const char *) info->GetAlleleLabel(list[i].allele2[j])); } printf("\n"); } printf("\n"); }
void DosageCalculator::OutputBasicMarkerInfo(FILE * output) { fprintf(output, "SNP\tAl1\tAl2\tFreq\tRsq_hat\n"); double scale = 1.0 / (genotypes + 1e-30); for (int marker = 0; marker < markers; marker++) { double sum = 0.0, sumsq = 0.0; for (int sample = 0; sample < genotypes; sample++) { double dose = GetDosage(sample, marker); sum += dose; sumsq += dose * dose; } sum *= scale; sumsq *= scale; double freq = sum * 0.50; double var1 = 2 * freq * (1.0 - freq); double var2 = max(sumsq - sum * sum, 0.0); MarkerInfo * info = Pedigree::GetMarkerInfo(marker); fprintf(output, "%s\t%s\t%s\t%.4f\t%.4f\n", (const char *) info->name, (const char *) info->GetAlleleLabel(1), info->CountAlleles() > 1 ? (const char *) info->GetAlleleLabel(2) : "-", freq > 0.50 ? 1.0 - freq : freq, var2 > var1 ? 1.0 : var2 / (var1 + 1e-30)); } }
void DosageCalculator::OutputMarkerInfo(FILE * output) { if (stored == 0) return; if (!storeDistribution) { OutputBasicMarkerInfo(output); return; } fprintf(output, "SNP\tAl1\tAl2\tFreq1\tMAF\tQuality\tRsq\n"); double scale_sg = 1.0 / (samples * genotypes + 1e-30); double scale_g = 1.0 / (genotypes + 1e-30); double scale_ss = 1.0 / (samples * samples + 1e-30); for (int marker = 0; marker < markers; marker++) { double p0 = 0.0, p1 = 0.0; double qc = 0.0, sumsq = 0.0; for (int sample = 0; sample < genotypes; sample++) { unsigned int n0, n1, n2; GetCounts(sample, marker, n0, n1, n2); p0 += n0; p1 += n1; qc += (n0 > n1 && n0 > n2) ? n0 : (n1 > n2) ? n1 : n2; sumsq += square(n0 + n1 * 0.5) * scale_ss; } p0 *= scale_sg; p1 *= scale_sg; qc *= scale_sg; sumsq *= scale_g; double freq = p0 + p1 * 0.50; double var1 = max(p0 + p1 * 0.25 - square(freq), 0); double var2 = max(sumsq - square(freq), 0); MarkerInfo * info = Pedigree::GetMarkerInfo(marker); fprintf(output, "%s\t%s\t%s\t%.4f\t%.4f\t%.4f\t%.4f\n", (const char *) info->name, (const char *) info->GetAlleleLabel(1), info->CountAlleles() > 1 ? (const char *) info->GetAlleleLabel(2) : "-", freq, freq > 0.50 ? 1.0 - freq : freq, qc, var2 / (var1 + 1e-30)); } }
bool Pedigree::SexLinkedCheck() { bool fail = false; // Keep track of what families fail the basic inheritance check, // so that we can run later run genotype elimination check on the remainder IntArray failedFamilies(familyCount); // For each marker ... for (int m = 0; m < markerCount; m++) { MarkerInfo * info = GetMarkerInfo(m); failedFamilies.Zero(); // Check for homozygous males for (int f = 0; f < familyCount; f++) for (int i = families[f]->first; i <= families[f]->last; i++) if (persons[i]->sex == SEX_MALE && persons[i]->markers[m].isKnown() && !persons[i]->markers[m].isHomozygous()) { printf("%s - Fam %s: Male %s has two X alleles [%s/%s]\n", (const char *) markerNames[m], (const char *) persons[i]->famid, (const char *) persons[i]->pid, (const char *) info->GetAlleleLabel(persons[i]->markers[m].one), (const char *) info->GetAlleleLabel(persons[i]->markers[m].two)); // Wipe this genotype so we don't get cascading errors below persons[i]->markers[m][0] = persons[i]->markers[m][1] = 0; fail = true; failedFamilies[f] = true; } // Check full sibships for errors // TODO -- We could do better by grouping male half-sibs for (int f = 0; f < familyCount; f++) for (int i = families[f]->first; i <= families[f]->last; i++) if (!persons[i]->isFounder() && persons[i]->sibs[0] == persons[i]) { // This loop runs once per sibship Alleles fat = persons[i]->father->markers[m]; Alleles mot = persons[i]->mother->markers[m]; bool fgeno = fat.isKnown(); bool mgeno = mot.isKnown(); Alleles inferred_mother = mot; Alleles first_sister; Alleles inferred_father; bool mother_ok = true; int sisters = 0; for (int j = 0; j < persons[i]->sibCount; j++) if (persons[i]->sibs[j]->isGenotyped(m)) { Alleles geno = persons[i]->sibs[j]->markers[m]; bool fat1 = fat.hasAllele(geno.one); bool fat2 = fat.hasAllele(geno.two); bool mot1 = mot.hasAllele(geno.one); bool mot2 = mot.hasAllele(geno.two); int sex = persons[i]->sibs[j]->sex; if (sex == SEX_MALE) { if (mgeno && !mot1) { printf("%s - Fam %s: Child %s [%s/Y] has mother [%s/%s]\n", (const char *) markerNames[m], (const char *) persons[i]->famid, (const char *) persons[i]->sibs[j]->pid, (const char *) info->GetAlleleLabel(geno.one), (const char *) info->GetAlleleLabel(mot.one), (const char *) info->GetAlleleLabel(mot.two)); fail = true; failedFamilies[f] = true; } else mother_ok &= inferred_mother.AddAllele(geno.one); } if (sex == SEX_FEMALE) { if ((fgeno && mgeno && !((fat1 && mot2) || (fat2 && mot1))) || (fgeno && !(fat1 || fat2)) || (mgeno && !(mot1 || mot2))) { printf("%s - Fam %s: Child %s [%s/%s] has ", (const char *) markerNames[m], (const char *) persons[i]->famid, (const char *) persons[i]->sibs[j]->pid, (const char *) info->GetAlleleLabel(geno.one), (const char *) info->GetAlleleLabel(geno.two)); if (!fgeno) printf("mother [%s/%s]\n", (const char *) info->GetAlleleLabel(mot.one), (const char *) info->GetAlleleLabel(mot.two)); else if (!mgeno) printf("father [%s/Y]\n", (const char *) info->GetAlleleLabel(fat.one)); else printf("parents [%s/Y]*[%s/%s]\n", (const char *) info->GetAlleleLabel(fat.one), (const char *) info->GetAlleleLabel(mot.one), (const char *) info->GetAlleleLabel(mot.two)); fail = true; failedFamilies[f] = true; } else { if (!sisters++) inferred_father = first_sister = geno; else if (first_sister != geno) { inferred_father.Intersect(geno); mother_ok &= inferred_mother.AddAllele( geno.otherAllele(inferred_father.one)); mother_ok &= inferred_mother.AddAllele( first_sister.otherAllele(inferred_father.one)); } if (!fgeno && (mot1 ^ mot2)) inferred_father.Intersect(mot1 ? geno.two : geno.one); if (!mgeno && (fat1 ^ fat2)) mother_ok &= inferred_mother.AddAllele(fat1 ? geno.two : geno.one); } } } if (!mother_ok || (sisters && !inferred_father.isKnown())) { printf("%s - Fam %s: ", (const char *) markerNames[m], (const char *) persons[i]->famid); if (fgeno) printf("Father %s [%s/Y] has children [", (const char *) persons[i]->father->pid, (const char *) info->GetAlleleLabel(fat.one)); else if (mgeno) printf("Mother %s [%s/%s] has children [", (const char *) persons[i]->mother->pid, (const char *) info->GetAlleleLabel(mot.one), (const char *) info->GetAlleleLabel(mot.two)); else printf("Couple %s * %s has children [", (const char *) persons[i]->mother->pid, (const char *) persons[i]->father->pid); for (int j = 0; j < persons[i]->sibCount; j++) printf( persons[i]->sibs[j]->sex == SEX_MALE ? "%s%s/Y" : "%s%s/%s", j == 0 ? "" : " ", (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].one), (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].two)); printf("]\n"); fail = true; failedFamilies[f] = true; } } for (int f = 0; f < familyCount; f++) if (!failedFamilies[f] && (families[f]->count > families[f]->founders + 1) && !families[f]->isNuclear()) fail |= !GenotypeList::EliminateGenotypes(*this, families[f], m); } if (fail) printf("\nMendelian inheritance errors detected\n"); return fail; }
bool Pedigree::AutosomalCheck() { // Arrays indicating which alleles and homozygotes occur IntArray haplos, genos, counts, failedFamilies; bool fail = false; // For each marker ... for (int m = 0; m < markerCount; m++) { MarkerInfo * info = GetMarkerInfo(m); // Summary for marker int alleleCount = CountAlleles(m); int genoCount = alleleCount * (alleleCount + 1) / 2; // Initialize arrays haplos.Dimension(alleleCount + 1); haplos.Set(-1); genos.Dimension(genoCount + 1); genos.Set(-1); failedFamilies.Dimension(familyCount); failedFamilies.Zero(); counts.Dimension(alleleCount + 1); for (int f = 0; f < familyCount; f++) for (int i = families[f]->first; i <= families[f]->last; i++) if (!persons[i]->isFounder() && persons[i]->sibs[0] == persons[i]) { // This loop runs once per sibship Alleles fat = persons[i]->father->markers[m]; Alleles mot = persons[i]->mother->markers[m]; bool fgeno = fat.isKnown(); bool mgeno = mot.isKnown(); // Number of alleles, homozygotes and genotypes in this sibship int haplo = 0, h**o = 0, diplo = 0; // No. of different genotypes per allele counts.Zero(); // In general, there should be no more than 3 genotypes per allele bool too_many_genos = false; for (int j = 0; j < persons[i]->sibCount; j++) if (persons[i]->sibs[j]->isGenotyped(m)) { Alleles geno = persons[i]->sibs[j]->markers[m]; int fat1 = fat.hasAllele(geno.one); int fat2 = fat.hasAllele(geno.two); int mot1 = mot.hasAllele(geno.one); int mot2 = mot.hasAllele(geno.two); if ((fgeno && mgeno && !((fat1 && mot2) || (fat2 && mot1))) || (fgeno && !(fat1 || fat2)) || (mgeno && !(mot1 || mot2))) { printf("%s - Fam %s: Child %s [%s/%s] has ", (const char *) markerNames[m], (const char *) persons[i]->sibs[j]->famid, (const char *) persons[i]->sibs[j]->pid, (const char *) info->GetAlleleLabel(geno.one), (const char *) info->GetAlleleLabel(geno.two)); if (!fgeno || !mgeno) printf("%s [%s/%s]\n", fgeno ? "father" : "mother", (const char *) info->GetAlleleLabel(fgeno ? fat.one : mot.one), (const char *) info->GetAlleleLabel(fgeno ? fat.two : mot.two)); else printf("parents [%s/%s]*[%s/%s]\n", (const char *) info->GetAlleleLabel(fat.one), (const char *) info->GetAlleleLabel(fat.two), (const char *) info->GetAlleleLabel(mot.one), (const char *) info->GetAlleleLabel(mot.two)); fail = true; failedFamilies[f] = true; } else { if (haplos[geno.one] != i) { haplo++; haplos[geno.one] = i; }; if (haplos[geno.two] != i) { haplo++; haplos[geno.two] = i; }; int index = geno.SequenceCoded(); if (genos[index] != i) { genos[index] = i; diplo++; counts[geno.one]++; if (geno.isHomozygous()) h**o++; else counts[geno.two]++; if (counts[geno.one] > 2) too_many_genos = true; if (counts[geno.two] > 2) too_many_genos = true; } } } if (fgeno) { if (haplos[fat.one] != i) { haplo++; haplos[fat.one] = i; } if (haplos[fat.two] != i) { haplo++; haplos[fat.two] = i; } h**o += fat.isHomozygous(); } if (mgeno) { if (haplos[mot.one] != i) { haplo++; haplos[mot.one] = i; } if (haplos[mot.two] != i) { haplo++; haplos[mot.two] = i; } h**o += mot.isHomozygous(); } if (diplo > 4 || haplo + h**o > 4 || (haplo == 4 && too_many_genos)) { printf("%s - Fam %s: ", (const char *) markerNames[m], (const char *) persons[i]->famid); if (persons[i]->father->markers[m].isKnown()) printf("Father %s [%s/%s] has children [", (const char *) persons[i]->father->pid, (const char *) info->GetAlleleLabel(fat.one), (const char *) info->GetAlleleLabel(fat.two)); else if (persons[i]->mother->markers[m].isKnown()) printf("Mother %s [%s/%s] has children [", (const char *) persons[i]->mother->pid, (const char *) info->GetAlleleLabel(mot.one), (const char *) info->GetAlleleLabel(mot.two)); else printf("Couple %s * %s has children [", (const char *) persons[i]->mother->pid, (const char *) persons[i]->father->pid); for (int j = 0; j < persons[i]->sibCount; j++) printf("%s%s/%s", j == 0 ? "" : " ", (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].one), (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].two)); printf("]\n"); fail = true; failedFamilies[f] = true; } } for (int f = 0; f < familyCount; f++) if (!failedFamilies[f] && (families[f]->count > families[f]->founders + 1) && !families[f]->isNuclear()) fail |= !GenotypeList::EliminateGenotypes(*this, families[f], m); } if (fail) printf("\nMendelian inheritance errors detected\n"); return fail; }