void spDbAddVarSplice(char *database, char *inFile, char *outDir) /* spDbAddVarSplice - This adds information on the varient splices to the sp/uniProt database. */ { struct sqlConnection *conn = sqlConnect(database); char query[256]; makeDir(outDir); FILE *varProtein = openToWrite(outDir, "varProtein.txt"); FILE *varAcc = openToWrite(outDir, "varAcc.txt"); FILE *varDisplayId = openToWrite(outDir, "varDisplayId.txt"); FILE *varAccToTaxon = openToWrite(outDir, "varAccToTaxon.txt"); FILE *varDescription = openToWrite(outDir, "varDescription.txt"); FILE *varGene = openToWrite(outDir, "varGene.txt"); FILE *varGeneLogic = openToWrite(outDir, "varGeneLogic.txt"); struct lineFile *lf = lineFileOpen(inFile, TRUE); aaSeq seq; ZeroVar(&seq); while (faPepSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { char *row[4]; char *name = seq.name; if (startsWith("sp|", name)) // Skip over sp| introduced Aug 2009 name += 3; int rowSize = chopString(name, "-|", row, ArraySize(row)); if (rowSize != 3) errAbort("Expecting name to be in format accession-N|DISP_ID, got %s\n", name); char *acc = row[0]; char *version = row[1]; char *displayId = row[2]; int accLen = strlen(acc); int verLen = strlen(version); int displayIdLen = strlen(displayId); /* Do some tests. */ if ((accLen != 6 && accLen != 10) || isdigit(acc[0]) || !isdigit(acc[accLen-1])) errAbort("wierd accession %s before line %d of %s", acc, lf->lineIx, lf->fileName); if (!isdigit(version[0]) || verLen > 4) errAbort("wierd version %s before line %d of %s", version, lf->lineIx, lf->fileName); if (countChars(displayId, '_') != 1 || displayIdLen < 6 || displayIdLen > 16) errAbort("wierd displayId %s before line %d of %s", displayId, lf->lineIx, lf->fileName); if (accLen + 1 + verLen >= sizeof(SpAcc)) errAbort("Need to increase size of SpAcc in spDb.h because of %s-%s - need %d characters but only have %lu", acc, version, accLen + 1 + verLen, sizeof(SpAcc)); /* Print out parsed results. */ fprintf(varAcc, "%s-%s\t%s\t%s\n", acc, version, acc, version); fprintf(varProtein, "%s-%s\t%s\n", acc, version, seq.dna); fprintf(varDisplayId, "%s-%s\t%s-%s\n", acc, version, acc, version); /* Look up taxon of base protein and use it to write to varAccToTaxon table. */ int taxon = spTaxon(conn, acc); fprintf(varAccToTaxon, "%s-%s\t%d\n", acc, version, taxon); /*Transfer description. */ char *description = spDescription(conn, acc); fprintf(varDescription, "%s-%s\t%s\n", acc, version, description); freez(&description); /* Transfer gene logic. */ sqlSafef(query, sizeof(query), "select val from geneLogic where acc = '%s'", acc); char *geneLogic = sqlQuickString(conn, query); if (geneLogic != NULL) fprintf(varGeneLogic, "%s-%s\t%s\n", acc, version, geneLogic); freez(&geneLogic); /* Transfer genes. */ struct slName *gene, *geneList = spGenes(conn, acc); for (gene = geneList; gene != NULL; gene = gene->next) fprintf(varGene, "%s-%s\t%s\n", acc, version, gene->name); slFreeList(&geneList); } carefulClose(&varAcc); carefulClose(&varProtein); carefulClose(&varDisplayId); carefulClose(&varAccToTaxon); carefulClose(&varDescription); carefulClose(&varGene); carefulClose(&varGeneLogic); sqlDisconnect(&conn); }
void spTest(char *database, char *someAcc) /* spTest - Test out sp library.. */ { struct sqlConnection *conn = sqlConnect(database); char *acc, *id, *binomial, *common; struct slName *geneList, *gene, *accList, *n, *list; struct slName *nameList, *name, *keyList, *key, *typeList, *type; struct spFeature *featList, *feat; struct spCitation *citeList, *cite; char *ret = NULL; int taxon; int classId = 0, typeId = 0, refId = 0; printf("input: %s\n", someAcc); acc = spLookupPrimaryAcc(conn, someAcc); printf("primary accession: %s\n", acc); id = spAccToId(conn, acc); printf("SwissProt id: %s\n", id); printf("acc from id: %s\n", spIdToAcc(conn, id)); ret = spOrganelle(conn, acc); printf("organelle: %s\n", (ret == NULL) ? "(null)" : ret); printf("isCurated: %d\n", spIsCurated(conn, acc)); printf("aaSize: %d\n", spAaSize(conn,acc)); printf("molWeight: %d\n", spMolWeight(conn,acc)); printf("createDate: %s\n", spCreateDate(conn,acc)); printf("seqDate: %s\n", spSeqDate(conn,acc)); printf("annDate: %s\n", spAnnDate(conn,acc)); printf("description: %s\n", spDescription(conn, acc)); taxon = spTaxon(conn, acc); printf("taxon: %d\n", taxon); binomial = spTaxonToBinomial(conn, taxon); printf("first scientific name: %s\n", binomial); common = spTaxonToCommon(conn, taxon); printf("first common name: %s\n", common); printf("taxon from sci: %d\n", spBinomialToTaxon(conn, binomial)); printf("taxon from common: %d\n", spCommonToTaxon(conn, common)); printf("all scientific names:"); nameList = spBinomialNames(conn, acc); for (name = nameList; name != NULL; name = name->next) printf(" %s,", name->name); printf("\n"); printf("gene(s):"); geneList = spGenes(conn,acc); for (gene=geneList; gene != NULL; gene = gene->next) printf(" %s,", gene->name); printf("\n"); for (gene=geneList; gene != NULL; gene = gene->next) { accList = spGeneToAccs(conn, gene->name, 0); printf(" any %s:", gene->name); for (n = accList; n != NULL; n = n->next) printf(" %s,", n->name); printf("\n"); slFreeList(&accList); printf(" %s %s:", common, gene->name); accList = spGeneToAccs(conn, gene->name, taxon); for (n = accList; n != NULL; n = n->next) printf(" %s,", n->name); printf("\n"); slFreeList(&accList); } slFreeList(&geneList); printf("keyword(s):"); keyList = spKeywords(conn, acc); for (key = keyList; key != NULL; key = key->next) printf(" %s,", key->name); printf("\n"); for (key = keyList; key != NULL; key = key->next) { accList = spKeywordSearch(conn, key->name, taxon); printPartialList(common, key->name, accList, 4); slFreeList(&accList); break; /* This is a little slow, once is enough. */ } for (key = keyList; key != NULL; key = key->next) { accList = spKeywordSearch(conn, key->name, 0); printPartialList("all", key->name, accList, 4); slFreeList(&accList); break; /* This is a little slow, once is enough. */ } slFreeList(&keyList); printf("All comments:\n"); list = slComments(conn, acc, NULL); for (n = list; n != NULL; n = n->next) printf(" %s\n", n->name); slFreeList(&list); typeList = slCommentTypes(conn); for (type = typeList; type != NULL; type = type->next) { list = slComments(conn, acc, type->name); if (list != NULL) { printf("%s comments:\n", type->name); for (n = list; n != NULL; n = n->next) printf(" %s\n", n->name); slFreeList(&list); } } slFreeList(&typeList); list = spEmblAccs(conn, acc); printf("GenBank/EMBL:"); for (n = list; n != NULL; n = n->next) printf(" %s,", n->name); printf("\n"); if (list != NULL) printf("acc from %s: %s\n", list->name, spAccFromEmbl(conn, list->name)); slFreeList(&list); list = spPdbAccs(conn, acc); printf("PDB:"); for (n = list; n != NULL; n = n->next) printf(" %s,", n->name); printf("\n"); featList = spFeatures(conn, acc, 0, 0); printf("All features:\n"); for (feat = featList; feat != NULL; feat = feat->next) { printFeat(conn, feat); classId = feat->featureClass; typeId = feat->featureType; } slFreeList(&featList); if (classId != 0 && typeId != 0) { printf("%s class features:\n", spFeatureClassName(conn, classId)); featList = spFeatures(conn, acc, classId, 0); for (feat = featList; feat != NULL; feat = feat->next) printFeat(conn, feat); slFreeList(&featList); printf("%s type features:\n", spFeatureTypeName(conn, typeId)); featList = spFeatures(conn, acc, 0, typeId); for (feat = featList; feat != NULL; feat = feat->next) printFeat(conn, feat); slFreeList(&featList); printf("same class & type features:\n"); featList = spFeatures(conn, acc, classId, typeId); for (feat = featList; feat != NULL; feat = feat->next) printFeat(conn, feat); slFreeList(&featList); printf("class loop: %d->%s->%d\n", classId, spFeatureClassName(conn, classId), spFeatureClassId(conn, spFeatureClassName(conn, classId))); printf("type loop: %d->%s->%d\n", typeId, spFeatureTypeName(conn, typeId), spFeatureTypeId(conn, spFeatureTypeName(conn, typeId))); } citeList = spCitations(conn, acc); for (cite = citeList; cite != NULL; cite = cite->next) { refId = cite->reference; printf("title: %s\n", spRefTitle(conn, refId)); printf("authors:"); list = spRefAuthors(conn, refId); for (n = list; n != NULL; n = n->next) printf(" %s, ", n->name); printf("\n"); slFreeList(&list); printf("location: %s\n", spRefCite(conn, refId)); printf("pubMed: %s\n", spRefPubMed(conn, refId)); } if (refId != 0) { printf("other accs associated with last reference:\n\t"); list = spRefToAccs(conn, refId); printPartialList("", "", list, 6); slFreeList(&list); } sqlDisconnect(&conn); }