Example #1
0
void spDbAddVarSplice(char *database, char *inFile, char *outDir)
/* spDbAddVarSplice - This adds information on the varient splices to the sp/uniProt database. */
{
struct sqlConnection *conn = sqlConnect(database);
char query[256];
makeDir(outDir);
FILE *varProtein = openToWrite(outDir, "varProtein.txt");
FILE *varAcc = openToWrite(outDir, "varAcc.txt");
FILE *varDisplayId = openToWrite(outDir, "varDisplayId.txt");
FILE *varAccToTaxon = openToWrite(outDir, "varAccToTaxon.txt");
FILE *varDescription = openToWrite(outDir, "varDescription.txt");
FILE *varGene = openToWrite(outDir, "varGene.txt");
FILE *varGeneLogic = openToWrite(outDir, "varGeneLogic.txt");
struct lineFile *lf = lineFileOpen(inFile, TRUE);
aaSeq seq;
ZeroVar(&seq);
while (faPepSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
    {
    char *row[4];
    char *name = seq.name;
    if (startsWith("sp|", name))	// Skip over sp| introduced Aug 2009
        name += 3;
    int rowSize = chopString(name, "-|", row, ArraySize(row));
    if (rowSize != 3)
        errAbort("Expecting name to be in format accession-N|DISP_ID, got %s\n", name);
    char *acc = row[0];
    char *version = row[1];
    char *displayId = row[2];
    int accLen = strlen(acc);
    int verLen = strlen(version);
    int displayIdLen = strlen(displayId);

    /* Do some tests. */
    if ((accLen != 6 && accLen != 10) || isdigit(acc[0]) || !isdigit(acc[accLen-1]))
        errAbort("wierd accession %s before line %d of %s", acc, lf->lineIx, lf->fileName);
    if (!isdigit(version[0]) || verLen > 4)
        errAbort("wierd version %s before line %d of %s", version, lf->lineIx, lf->fileName);
    if (countChars(displayId, '_') != 1 || displayIdLen < 6 || displayIdLen > 16)
        errAbort("wierd displayId %s before line %d of %s", displayId, lf->lineIx, lf->fileName);
    if (accLen + 1 + verLen >= sizeof(SpAcc))
        errAbort("Need to increase size of SpAcc in spDb.h because of %s-%s - need %d characters but only have %lu", acc, version, accLen + 1 + verLen, sizeof(SpAcc));

    /* Print out parsed results. */
    fprintf(varAcc, "%s-%s\t%s\t%s\n", acc, version, acc, version);
    fprintf(varProtein, "%s-%s\t%s\n", acc, version, seq.dna);
    fprintf(varDisplayId, "%s-%s\t%s-%s\n", acc, version, acc, version);

    /* Look up taxon of base protein and use it to write to varAccToTaxon table. */
    int taxon = spTaxon(conn, acc);
    fprintf(varAccToTaxon, "%s-%s\t%d\n", acc, version, taxon);

    /*Transfer description. */
    char *description = spDescription(conn, acc);
    fprintf(varDescription, "%s-%s\t%s\n", acc, version, description);
    freez(&description);

    /* Transfer gene logic. */
    sqlSafef(query, sizeof(query), "select val from geneLogic where acc = '%s'", acc);
    char *geneLogic = sqlQuickString(conn, query);
    if (geneLogic != NULL)
        fprintf(varGeneLogic, "%s-%s\t%s\n", acc, version, geneLogic);
    freez(&geneLogic);

    /* Transfer genes. */
    struct slName *gene, *geneList = spGenes(conn, acc);
    for (gene = geneList; gene != NULL; gene = gene->next)
        fprintf(varGene, "%s-%s\t%s\n", acc, version, gene->name);
    slFreeList(&geneList);

    }
carefulClose(&varAcc);
carefulClose(&varProtein);
carefulClose(&varDisplayId);
carefulClose(&varAccToTaxon);
carefulClose(&varDescription);
carefulClose(&varGene);
carefulClose(&varGeneLogic);
sqlDisconnect(&conn);
}
Example #2
0
void spTest(char *database, char *someAcc)
/* spTest - Test out sp library.. */
{
struct sqlConnection *conn = sqlConnect(database);
char *acc, *id, *binomial, *common;
struct slName *geneList, *gene, *accList, *n, *list;
struct slName *nameList, *name, *keyList, *key, *typeList, *type;
struct spFeature *featList, *feat;
struct spCitation *citeList, *cite;
char *ret = NULL;
int taxon;
int classId = 0, typeId = 0, refId = 0;

printf("input: %s\n", someAcc);
acc = spLookupPrimaryAcc(conn, someAcc);
printf("primary accession: %s\n", acc);
id = spAccToId(conn, acc);
printf("SwissProt id: %s\n", id);
printf("acc from id: %s\n", spIdToAcc(conn, id));
ret = spOrganelle(conn, acc);
printf("organelle: %s\n", (ret == NULL) ? "(null)" : ret);
printf("isCurated: %d\n", spIsCurated(conn, acc));
printf("aaSize: %d\n", spAaSize(conn,acc));
printf("molWeight: %d\n", spMolWeight(conn,acc));
printf("createDate: %s\n", spCreateDate(conn,acc));
printf("seqDate: %s\n", spSeqDate(conn,acc));
printf("annDate: %s\n", spAnnDate(conn,acc));
printf("description: %s\n", spDescription(conn, acc));
taxon = spTaxon(conn, acc);
printf("taxon: %d\n", taxon);
binomial = spTaxonToBinomial(conn, taxon);
printf("first scientific name: %s\n", binomial);
common = spTaxonToCommon(conn, taxon);
printf("first common name: %s\n", common);
printf("taxon from sci: %d\n", spBinomialToTaxon(conn, binomial));
printf("taxon from common: %d\n", spCommonToTaxon(conn, common));
printf("all scientific names:");
nameList = spBinomialNames(conn, acc);
for (name = nameList; name != NULL; name = name->next)
    printf(" %s,", name->name);
printf("\n");
printf("gene(s):");
geneList = spGenes(conn,acc);
for (gene=geneList; gene != NULL; gene = gene->next)
    printf(" %s,", gene->name);
printf("\n");
for (gene=geneList; gene != NULL; gene = gene->next)
    {
    accList = spGeneToAccs(conn, gene->name, 0);
    printf(" any %s:", gene->name);
    for (n = accList; n != NULL; n = n->next)
        printf(" %s,", n->name);
    printf("\n");
    slFreeList(&accList);
    printf(" %s %s:", common, gene->name);
    accList = spGeneToAccs(conn, gene->name, taxon);
    for (n = accList; n != NULL; n = n->next)
        printf(" %s,", n->name);
    printf("\n");
    slFreeList(&accList);
    }
slFreeList(&geneList);
printf("keyword(s):");
keyList = spKeywords(conn, acc);
for (key = keyList; key != NULL; key = key->next)
    printf(" %s,", key->name);
printf("\n");
for (key = keyList; key != NULL; key = key->next)
    {
    accList = spKeywordSearch(conn, key->name, taxon);
    printPartialList(common, key->name, accList, 4);
    slFreeList(&accList);
    break;	/* This is a little slow, once is enough. */
    }
for (key = keyList; key != NULL; key = key->next)
    {
    accList = spKeywordSearch(conn, key->name, 0);
    printPartialList("all", key->name, accList, 4);
    slFreeList(&accList);
    break;	/* This is a little slow, once is enough. */
    }
slFreeList(&keyList);

printf("All comments:\n");
list = slComments(conn, acc, NULL);
for (n = list; n != NULL; n = n->next)
    printf(" %s\n", n->name);
slFreeList(&list);

typeList = slCommentTypes(conn);
for (type = typeList; type != NULL; type = type->next)
    {
    list = slComments(conn, acc, type->name);
    if (list != NULL)
	{
	printf("%s comments:\n", type->name);
	for (n = list; n != NULL; n = n->next)
	    printf(" %s\n", n->name);
	slFreeList(&list);
	}
    }
slFreeList(&typeList);

list = spEmblAccs(conn, acc);
printf("GenBank/EMBL:");
for (n = list; n != NULL; n = n->next)
    printf(" %s,", n->name);
printf("\n");
if (list != NULL)
    printf("acc from %s: %s\n", 
    	list->name, spAccFromEmbl(conn, list->name));
slFreeList(&list);

list = spPdbAccs(conn, acc);
printf("PDB:");
for (n = list; n != NULL; n = n->next)
    printf(" %s,", n->name);
printf("\n");

featList = spFeatures(conn, acc, 0, 0);
printf("All features:\n");
for (feat = featList; feat != NULL; feat = feat->next)
    {
    printFeat(conn, feat);
    classId = feat->featureClass;
    typeId = feat->featureType;
    }
slFreeList(&featList);
if (classId != 0 && typeId != 0)
    {
    printf("%s class features:\n", spFeatureClassName(conn, classId));
    featList = spFeatures(conn, acc, classId, 0);
    for (feat = featList; feat != NULL; feat = feat->next)
	printFeat(conn, feat);
    slFreeList(&featList);
    printf("%s type features:\n", spFeatureTypeName(conn, typeId));
    featList = spFeatures(conn, acc, 0, typeId);
    for (feat = featList; feat != NULL; feat = feat->next)
	printFeat(conn, feat);
    slFreeList(&featList);
    printf("same class & type features:\n");
    featList = spFeatures(conn, acc, classId, typeId);
    for (feat = featList; feat != NULL; feat = feat->next)
	printFeat(conn, feat);
    slFreeList(&featList);
    printf("class loop: %d->%s->%d\n", classId, 
    	spFeatureClassName(conn, classId),
	spFeatureClassId(conn, spFeatureClassName(conn, classId)));
    printf("type loop: %d->%s->%d\n", typeId, 
    	spFeatureTypeName(conn, typeId),
	spFeatureTypeId(conn, spFeatureTypeName(conn, typeId)));
    }

citeList = spCitations(conn, acc);
for (cite = citeList; cite != NULL; cite = cite->next)
    {
    refId = cite->reference;
    printf("title: %s\n", spRefTitle(conn, refId));
    printf("authors:");
    list = spRefAuthors(conn, refId);
    for (n = list; n != NULL; n = n->next)
        printf(" %s, ", n->name);
    printf("\n");
    slFreeList(&list);
    printf("location: %s\n", spRefCite(conn, refId));
    printf("pubMed: %s\n", spRefPubMed(conn, refId));
    }
if (refId != 0)
    {
    printf("other accs associated with last reference:\n\t");
    list = spRefToAccs(conn, refId);
    printPartialList("", "", list, 6);
    slFreeList(&list);
    }
sqlDisconnect(&conn);
}