Exemplo n.º 1
0
static struct hash *makeCvHash()
/* Turn a bunch of lists of words into hashes for fast lookup of whether
 * something is in a controlled vocabulary. */
{
/* These are just code generate things pasted in for now.  May do something more elegant and
 * prettier later */
char *assay[] =
{
"ATAC-seq",
"broad-ChIP-seq",
"DNAse-seq",
"exome",
"Hi-C",
"long-RNA-seq",
"methyl-ChIP-seq",
"narrow-ChIP-seq",
"RIP-seq",
"short-RNA-seq",
"WGBS",
"WGS",
};
char *control[] =
{
"untreated",
"input",
"mock IP",
};
char *disease[] =
{
"DCM",
"HCM",
"LQT",
"TNM stage IIA, grade 3, ductal carcinoma",
"chronic myelogenous leukemia (CML)",
"LQT",
"acute promyelocytic leukemia",
};
char *enriched_in[] =
{
"coding",
"exon",
"genome",
"intron",
"open",
"promoter",
"unknown",
"utr",
"utr3",
"utr5",
};
char *formats[] =
{
"bam",
"bam.bai",
"bed",
"bigBed",
"bigWig",
"cram",
"fasta",
"fastq",
"gtf",
"html",
"idat",
"jpg",
"pdf",
"rcc",
"text",
"vcf",
"unknown",
};
char *sequencer[] =
{
"Illumina HiSeq",
"Illumina HiSeq 2000",
"Illumina HiSeq 2500",
"Illumina HiSeq 3000",
"Illumina HiSeq 4000",
"Illumina HiSeq X Five",
"Illumina HiSeq X Ten",
"Illumina MiSeq",
"Illumina MiSeq Dx",
"Illumina MiSeq FGx",
"Illumina NextSeq 500",
"Illumina unknown",
"PacBio RS II",
"Ion Torrent Ion Proton",
"Ion Torrent Ion PGM",
"Ion Torrent Ion Chef",
"454 GS FLX+ ",
"454 GS Junior+ ",
"SN7001226",
"HiSeq G0821 SN605",
"HiSeq at Illumina 700422R",
"MiSeq G0823 M00361",
};
char *species[] =
{
"H**o sapiens",
"Mus musculus",
};
char *strain[] =
{
"C57BL/6",
"BALB/c",
"Sftpc-Cre-ER-T2A-rtta -/- teto-GFP-H2B +/-",
"Aqp5-Cre-ER +/- mtmg-tdTomato -/-",
};
char *target_epitope[] =
{
"H3K4Me1",
"H3K4Me3",
"H3K27Ac",
"H3K27Me3",
"5mC",
"5hmC",
};

struct hash *hash = hashNew(0);
hashAdd(hash, "assay", makeStringHash(assay, ArraySize(assay)));
hashAdd(hash, "control", makeStringHash(control, ArraySize(control)));
hashAdd(hash, "disease", makeStringHash(disease, ArraySize(disease)));
hashAdd(hash, "enriched_in", makeStringHash(enriched_in, ArraySize(enriched_in)));
hashAdd(hash, "formats", makeStringHash(formats, ArraySize(formats)));
hashAdd(hash, "sequencer", makeStringHash(sequencer, ArraySize(sequencer)));
hashAdd(hash, "species", makeStringHash(species, ArraySize(species)));
hashAdd(hash, "strain", makeStringHash(strain, ArraySize(strain)));
hashAdd(hash, "target_epitope", makeStringHash(target_epitope, ArraySize(target_epitope)));
return hash;
}
Exemplo n.º 2
0
static struct hash *makeCvHash()
/* Turn a bunch of lists of words into hashes for fast lookup of whether
 * something is in a controlled vocabulary. */
{
/* These are just code generate things pasted in for now.  May do something more elegant and
 * prettier later */
char *assay[] =
{
"ATAC-seq",
"broad-ChIP-seq",
"DNAse-seq",
"exome",
"Hi-C",
"long-RNA-seq",
"methyl-ChIP-seq",
"narrow-ChIP-seq",
"RIP-seq",
"RRBS",
"short-RNA-seq",
"WGBS",
"WGS",
};
char *control_type[] =
{
"untreated",
"input",
"mock IP",
};
char *biosample_source_health_status[] =
{
"acute promyelocytic leukemia",
"amyotrophic lateral sclerosis",
"chronic myelogenous leukemia (CML)",
"glioblastoma",
"DCM",
"HCM",
"LQT",
"prostate cancer",
"TNM stage IIA, grade 3, ductal carcinoma",
"LQT",
};
char *enriched_in[] =
{
"coding",
"exon",
"genome",
"intron",
"open",
"promoter",
"unknown",
"utr",
"utr3",
"utr5",
};
char *fluidics_chip[] =
{
"Fluidigm C1 5-10 um IFC",
"Fluidigm C1",
};
char *format[] =
{
"2bit",
"bam",
"bam.bai",
"bed",
"bigBed",
"bigWig",
"cram",
"csv",
"customTrack",
"expression_matrix",
"fasta",
"fastq",
"gtf",
"html",
"idat",
"jpg",
"kallisto_abundance",
"pdf",
"png",
"rcc",
"tsv",
"text",
"vcf",
"vcf.gz.tbi",
"vcf.idx",
"unknown",
};
char *assay_platform[] =
{
"Illumina HiSeq",
"Illumina HiSeq 2000",
"Illumina HiSeq 2500",
"Illumina HiSeq 3000",
"Illumina HiSeq 4000",
"Illumina HiSeq X Five",
"Illumina HiSeq X Ten",
"Illumina MiSeq",
"Illumina MiSeq Dx",
"Illumina MiSeq FGx",
"Illumina NextSeq 500",
"Illumina (unknown)",
"PacBio RS II",
"Ion Torrent Ion Proton",
"Ion Torrent Ion PGM",
"Ion Torrent Ion Chef",
"454 GS FLX+ ",
"454 GS Junior+ ",
"SN7001226",
"HiSeq G0821 SN605",
"HiSeq at Illumina 700422R",
"MiSeq G0823 M00361",
};
char *species[] =
{
"H**o sapiens",
"Mus musculus",
};
char *strain[] =
{
"C57BL/6",
"BALB/c",
"Sftpc-Cre-ER-T2A-rtta -/- teto-GFP-H2B +/-",
"Aqp5-Cre-ER +/- mtmg-tdTomato -/-",
};
char *subcellular_localization[] =
{
"cytoplasm",
"monosome",
"polysome",
"light polysome",
"medium polysome",
"heavy polysome",
"membrane",
"insoluble",
"nucleus",
"n fraction",
};
char *immunoprecipitation_target[] =
{
"H3K4Me1",
"H3K4Me3",
"H3K27Ac",
"H3K27Me3",
"5mC",
"5hmC",
};

struct hash *hash = hashNew(0);
hashAdd(hash, "assay", makeStringHash(assay, ArraySize(assay)));
hashAdd(hash, "control_type", makeStringHash(control_type, ArraySize(control_type)));
hashAdd(hash, "biosample_source_health_status", 
    makeStringHash(biosample_source_health_status, ArraySize(biosample_source_health_status)));
hashAdd(hash, "enriched_in", makeStringHash(enriched_in, ArraySize(enriched_in)));
hashAdd(hash, "format", makeStringHash(format, ArraySize(format)));
hashAdd(hash, "fluidics_chip", makeStringHash(fluidics_chip, ArraySize(fluidics_chip)));
hashAdd(hash, "assay_platform", makeStringHash(assay_platform, ArraySize(assay_platform)));
hashAdd(hash, "species", makeStringHash(species, ArraySize(species)));
hashAdd(hash, "strain", makeStringHash(strain, ArraySize(strain)));
hashAdd(hash, "subcellular_localization", 
    makeStringHash(subcellular_localization, ArraySize(subcellular_localization)));
hashAdd(hash, "immunoprecipitation_target", 
    makeStringHash(immunoprecipitation_target, ArraySize(immunoprecipitation_target)));
return hash;
}