コード例 #1
0
ファイル: pslSort.c プロジェクト: kenongit/sequencing
struct psl *nextPsl(struct lineFile *lf)
/* Read next line from file and convert it to psl.  Return
 * NULL at eof. */
{
char *line;
int lineSize;
char *words[32];
int wordCount;

if (!lineFileNext(lf, &line, &lineSize))
    {
    //warn("File %s appears to be incomplete\n", lf->fileName);
    return NULL;
    }
wordCount = chopTabs(line, words);
if (wordCount == 21)
    {
    return pslLoad(words);
    }
else if (wordCount == 23)
    {
    return pslxLoad(words);
    }
else
    {
    warn("Bad line %d of %s", lf->lineIx, lf->fileName);
    return NULL;
    }
}
コード例 #2
0
static boolean mrnaDescriptionsExists(struct section *section, 
	struct sqlConnection *conn, char *geneId)
/* Return TRUE if mrna  on this one. */
{
struct psl *list = NULL;
if (hTableExists(sqlGetDatabase(conn), "all_mrna"))
    {
    struct sqlResult *sr;
    char **row;
    struct psl *psl;
    int rowOffset;
    char extra[64];
    safef(extra, sizeof(extra), "strand='%c'", curGenePred->strand[0]);
    sr = hRangeQuery(conn, "all_mrna", curGeneChrom, curGeneStart, curGeneEnd,
    	extra, &rowOffset);
    while ((row = sqlNextRow(sr)) != NULL)
         {
	 psl = pslLoad(row+rowOffset);
	 slAddHead(&list, psl);
	 }
    slReverse(&list);
    section->items = list;
    }
return slCount(list) > 0;
}
コード例 #3
0
static struct chromAnn* chromAnnPslReaderRead(struct chromAnnReader *car)
/* read next chromAnn from a PSL file  */
{
struct rowReader *rr = car->data;
if (!rowReaderNext(rr))
    return NULL;
rowReaderExpectAtLeast(rr, PSL_NUM_COLS);

char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(rr) : NULL;

struct psl *psl = pslLoad(rr->row);
struct chromAnn* ca;
if (car->opts & chromAnnUseQSide)
    ca = chromAnnNew(psl->qName, getPslQSideStrand(psl), psl->tName, rawCols,
                     strVectorWrite, strVectorFree);
else
    ca = chromAnnNew(psl->tName, getPslTSideStrand(psl), psl->qName, rawCols,
                     strVectorWrite, strVectorFree);

if (car->opts & chromAnnRange)
    {
    if (car->opts & chromAnnUseQSide)
        chromAnnBlkNew(ca, psl->qStart, psl->qEnd);
    else
        chromAnnBlkNew(ca, psl->tStart, psl->tEnd);
    }
else    
    addPslBlocks(ca, car->opts, psl);
chromAnnFinish(ca);
pslFree(&psl);
return ca;
}
コード例 #4
0
void musAliAt(char *database, char *chrom, char *humanFa, char *mouseFa)
/* musAliAt - Produce .fa files where mouse alignments hit on chr22. */
{
char query[256], **row;
struct sqlResult *sr;
struct sqlConnection *conn;
struct dnaSeq *musSeq, *homoSeq;
struct psl *psl;
struct hash *musHash = newHash(10);
FILE *musOut = mustOpen(mouseFa, "w");

hSetDb(database);
conn = hAllocConn();
sqlSafef(query, sizeof query, "select * from blatMouse where tName = '%s'", chrom);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    psl = pslLoad(row);
    if ((musSeq = hashFindVal(musHash, psl->qName)) == NULL)
        {
	musSeq = hExtSeq(psl->qName);
	hashAdd(musHash, psl->qName, NULL);
	faWriteNext(musOut, musSeq->name, musSeq->dna, musSeq->size);
	freeDnaSeq(&musSeq);
	}
    pslFree(&psl);
    }
}
コード例 #5
0
ファイル: altSplice.c プロジェクト: apmagalhaes/kentUtils
struct psl *loadPslsFromDb(struct sqlConnection *conn, int numTables, char **tables, 
			   char *chrom, unsigned int chromStart, unsigned int chromEnd)
/* load up all of the psls that align on a given section of the database */
{
struct sqlResult *sr = NULL;
char **row = NULL;
int rowOffset = -100;
struct psl *pslList = NULL;
struct psl *psl = NULL;
int i=0;
/* for each table load up the relevant psls */
for(i = 0; i < numTables; i++)
    {
    sr = hRangeQuery(conn, tables[i], chrom, chromStart, chromEnd, NULL, &rowOffset);
    while ((row = sqlNextRow(sr)) != NULL)
	{
	psl = pslLoad(row+rowOffset);
	slSafeAddHead(&pslList, psl);
	if(weightMrna && (stringIn("refSeqAli",tables[i]) || stringIn("mrna", tables[i])))
	    {
	    psl = clonePsl(psl);
	    slSafeAddHead(&pslList, psl);
	    }
	}    
    sqlFreeResult(&sr);
    }
slReverse(&pslList);
return pslList;
}
コード例 #6
0
ファイル: pslPairs.c プロジェクト: blumroy/kentUtils
void readPslFile(struct lineFile *pf)
/* Process all records in a psl file of mRNA alignments */
{
 int lineSize;
 char *line;
 char *words[32];
 int  wordCount;
 struct psl *psl;
 struct clone *clone;
 struct pslAli *pa = NULL;
 struct cloneName *cloneName;
 
 while (lineFileNext(pf, &line, &lineSize))
   {
     wordCount = chopTabs(line, words);
     if (wordCount != 21)
       errAbort("Bad line %d of %s\n", pf->lineIx, pf->fileName);
     psl = pslLoad(words);
     if (hashLookup(leftNames, psl->qName))
       cloneName = hashMustFindVal(leftNames, psl->qName);
     else if (hashLookup(rightNames, psl->qName))
       cloneName = hashMustFindVal(rightNames, psl->qName);
     else
       continue;
     clone = hashMustFindVal(clones, cloneName->name);
     if ((psl->tBaseInsert < TINSERT) && ((!NORANDOM) || (strlen(psl->tName) < 7))) 
       {
	 pa = createPslAli(psl);
	 if (hashLookup(leftNames, psl->qName))
	   slAddHead(&(clone->end1), pa);
	 else
	   slAddHead(&(clone->end2), pa);
       }
   }
}
コード例 #7
0
struct psl *nextPsl(struct lineFile *lf)
/* Read next line from file and convert it to psl.  Return
 * NULL at eof. */
{
char *line;
int lineSize;
char *words[32];
int wordCount;
struct psl *psl;

if (!lineFileNext(lf, &line, &lineSize))
    {
    return NULL;
    }
wordCount = chopLine(line, words);
if (wordCount == 21)
    {
    return pslLoad(words);
    }
else
    {
    errAbort("Bad line %d of %s, %d words expecting %d", lf->lineIx, lf->fileName, wordCount, 21);
    return NULL;
    }
}
コード例 #8
0
void convertPslFileRow(struct sqlConnection *conn, char **row, FILE *genePredFh)
/* A row from the PSL file, getting CDS */
{
    struct psl *psl = pslLoad(row);
    struct  genbankCds cds = getCds(conn, psl);
    convertPsl(psl, &cds, genePredFh);
    pslFree(&psl);
}
コード例 #9
0
void convertPslTableRow(char **row, FILE *genePredFh)
/* A row from the PSL query that includes CDS */
{
    struct psl *psl = pslLoad(row+1);
    struct  genbankCds cds;
    genbankCdsParse(row[0], &cds);
    convertPsl(psl, &cds, genePredFh);
    pslFree(&psl);
}
コード例 #10
0
ファイル: pslReader.c プロジェクト: blumroy/kentUtils
static struct psl *queryNext(struct pslReader* pr)
/* read the next record from a query */
{
char **row = sqlNextRow(pr->sr);
if (row == NULL)
    return NULL;
if (pr->isPslx)
    return pslxLoad(row+pr->rowOffset);
else
    return pslLoad(row+pr->rowOffset);
}
コード例 #11
0
ファイル: pslReps.c プロジェクト: blumroy/kentUtils
void pslReps(char *inName, char *bestAliName, char *repName)
/* Analyse inName and put best alignments for eacmRNA in estAliName.
 * Put repeat info in repName. */
{
struct lineFile *in = pslFileOpen(inName);
FILE *bestFile = mustOpen(bestAliName, "w");
FILE *repFile = mustOpen(repName, "w");
int lineSize;
char *line;
char *words[32];
int wordCount;
struct psl *pslList = NULL, *psl = NULL;
char lastName[512];
int aliCount = 0;
quiet = sameString(bestAliName, "stdout") || sameString(repName, "stdout");
if (coverQSizeFile != NULL)
    loadCoverQSizes(coverQSizeFile);

if (!quiet)
    printf("Processing %s to %s and %s\n", inName, bestAliName, repName);
 if (!noHead)
     pslWriteHead(bestFile);
strcpy(lastName, "");
while (lineFileNext(in, &line, &lineSize))
    {
    if (((++aliCount & 0x1ffff) == 0) && !quiet)
        {
	printf(".");
	fflush(stdout);
	}
    wordCount = chopTabs(line, words);
    if (wordCount == 21)
	psl = pslLoad(words);
    else if (wordCount == 23)
	psl = pslxLoad(words);
    else
	errAbort("Bad line %d of %s\n", in->lineIx, in->fileName);
    if (!sameString(lastName, psl->qName))
	{
	doOneAcc(lastName, pslList, bestFile, repFile);
	pslFreeList(&pslList);
	safef(lastName, sizeof(lastName), "%s", psl->qName);
	}
    slAddHead(&pslList, psl);
    }
doOneAcc(lastName, pslList, bestFile, repFile);
pslFreeList(&pslList);
lineFileClose(&in);
fclose(bestFile);
fclose(repFile);
if (!quiet)
    printf("Processed %d alignments\n", aliCount);
}
コード例 #12
0
void bestProbeOverlap(struct sqlConnection *conn, char *probeTable, 
	struct genePred *gpList, struct hash *gpToProbeHash)
/* Create hash of most overlapping probe if any for each gene. Require
 * at least 100 base overlap. */
{
/* Create a hash of binKeepers filled with probes. */
struct hash *keeperHash = keepersForChroms(conn);
struct hashCookie it = hashFirst(keeperHash);
struct hashEl *hel;
int pslCount = 0;
while ((hel = hashNext(&it)) != NULL)
    {
    char *chrom = hel->name;
    struct binKeeper *bk = hel->val;
    int rowOffset;
    struct sqlResult *sr = hChromQuery(conn, probeTable, chrom, NULL, &rowOffset);
    char **row;
    while ((row = sqlNextRow(sr)) != NULL)
        {
	struct psl *psl = pslLoad(row+rowOffset);
	binKeeperAdd(bk, psl->tStart, psl->tEnd, psl);
	++pslCount;
	}
    sqlFreeResult(&sr);
    }
verbose(2, "Loaded %d psls from %s\n", pslCount, probeTable);

/* Loop through gene list, finding best probe if any for each gene. */
struct genePred *gp;
for (gp = gpList; gp != NULL; gp = gp->next)
    {
    struct rbTree *rangeTree = genePredToRangeTree(gp, FALSE);
    struct psl *bestPsl = NULL;
    int bestOverlap = 99;	/* MinOverlap - 1 */
    struct binKeeper *bk = hashMustFindVal(keeperHash, gp->chrom);
    struct binElement *bin, *binList = binKeeperFind(bk, gp->txStart, gp->txEnd);
    for (bin = binList; bin != NULL; bin = bin->next)
        {
	struct psl *psl = bin->val;
	if (psl->strand[0] == gp->strand[0])
	    {
	    int overlap = pslRangeTreeOverlap(psl, rangeTree);
	    if (overlap > bestOverlap)
		{
		bestOverlap = overlap;
		bestPsl = psl;
		}
	    }
	}
    if (bestPsl != NULL)
        hashAdd(gpToProbeHash, gp->name, bestPsl->qName);
    }
}
コード例 #13
0
ファイル: pslFilterPrimers.c プロジェクト: blumroy/kentUtils
void processPrimers(struct lineFile *pf, FILE *of)
/* Read and process isPCR file and sts locations */
{
int lineSize, wordCount;
char *line;
char *words[21];
char *dbsts_name, *dbsts[4], *currDbsts;
struct sts *sts=NULL;
struct psl *psl;
struct place *place;

 currDbsts = "\0";
while (lineFileNext(pf, &line, &lineSize))
    {
    wordCount = chopTabs(line, words);
    if (wordCount != 21)
	errAbort("Bad line %d of %s\n", pf->lineIx, pf->fileName);
    psl = pslLoad(words);
    dbsts_name = cloneString(psl->qName);
    wordCount = chopByChar(dbsts_name, '_', dbsts, ArraySize(dbsts));
    if (differentString(dbsts[1], currDbsts))
      {
	if (sts != NULL)
	  {
	    filterPrimersAndWrite(of, sts);
	    /* stsFree(&sts); */
	    freez(&currDbsts);
	  }
	currDbsts = cloneString(dbsts[1]);
	sts = NULL;
	if (hashLookup(stsHash, dbsts[1]))
	  sts = hashMustFindVal(stsHash, dbsts[1]);
      }
    if (sts)
      {
	AllocVar(place);
	/* Check if this psl record is already present */
	if (!pslInList(place->psl, psl))
	  {
	    slAddHead(&place->psl, psl);
	    place->unali = calcUnali(sts, psl);
	    place->sizeDiff = calcSizeDiff(sts, psl);
	    place->badBits = calcBadBits(place);
	    if (place->sizeDiff < (200 - (place->badBits * 50)))
	      slAddHead(&sts->place, place);
	    else
	      placeFree(&place);
	  }
      }
    }
 if (sts != NULL)
   filterPrimersAndWrite(of, sts);
}
コード例 #14
0
static void processPslFile(struct sqlConnection *conn, struct gbSelect* select,
                           struct gbStatusTbl* statusTbl, char* pslPath)
/* Parse a psl file looking for accessions to add to the database. */
{
char* row[PSL_NUM_COLS];
struct lineFile *pslLf = gzLineFileOpen(pslPath);
while (lineFileNextRow(pslLf, row, PSL_NUM_COLS))
    {
    struct psl* psl = pslLoad(row);
    processPsl(conn, select, statusTbl, psl, pslLf);
    pslFree(&psl);
    }
gzLineFileClose(&pslLf);
}
コード例 #15
0
static void getAligns(struct sqlConnection *conn, struct hash *refSeqVerInfoTbl, char *outFile)
/* get request alignments from database */
{
int off = hOffsetPastBin(sqlGetDatabase(conn), NULL, "refSeqAli");
struct sqlResult *sr = sqlGetResult(conn, "SELECT * FROM refSeqAli");
FILE *fh = mustOpen(outFile, "w");
char **row;
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct psl *psl = pslLoad(row+off);
    processPsl(fh, refSeqVerInfoTbl, psl);
    pslFree(&psl);
    }
carefulClose(&fh);
sqlFreeResult(&sr);
}
コード例 #16
0
ファイル: pubsTracks.c プロジェクト: maximilianh/kent
static void pubsPslLoadItems(struct track *tg)
/* load only psl items from a single article */
{
// get articleId to filter on
char *articleId = cartOptionalString(cart, PUBSFILTERNAME);
if (articleId==NULL)
    return;

struct sqlConnection *conn = hAllocConn(database);
char *dispLabel = pubsArticleDispId(tg, conn, articleId);
struct hash *idToSnip = pubsLookupSequences(tg, conn, articleId, TRUE);
struct hash *idToSeq = pubsLookupSequences(tg, conn, articleId, FALSE);

// change track label 
char *oldLabel = tg->longLabel;
tg->longLabel = catTwoStrings("Individual matches for article ", dispLabel);
freeMem(oldLabel);

// filter and load items for this articleId
char where[256];
safef(where, sizeof(where), " articleId=%s ", articleId);

int rowOffset = 0;
struct sqlResult *sr = NULL;
sr = hRangeQuery(conn, tg->table, chromName, winStart, winEnd, where, &rowOffset);

struct linkedFeatures *lfList = NULL;
char **row = NULL;
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct psl *psl = pslLoad(row+rowOffset);
    slAddHead(&lfList, lfFromPsl(psl, TRUE));
    char *shortSeq  = hashFindVal(idToSeq,  lfList->name);
    char *snip = hashFindVal(idToSnip, lfList->name);
    struct pubsExtra *extra = needMem(sizeof(struct pubsExtra));
    extra->mouseOver=snip;
    extra->label=shortSeq;
    lfList->extra = extra;
    }
sqlFreeResult(&sr);
slReverse(&lfList);
slSort(&lfList, linkedFeaturesCmp);
tg->items = lfList;
hFreeConn(&conn);
}
コード例 #17
0
ファイル: pslReader.c プロジェクト: blumroy/kentUtils
static struct psl *fileNext(struct pslReader* pr)
/* read the next record from a file */
{
char *row[PSLX_NUM_COLS];
int numCols;

while ((numCols = lineFileChopNextTab(pr->lf, row, PSLX_NUM_COLS)) > 0)
    {
    lineFileExpectWords(pr->lf, (pr->isPslx ? PSLX_NUM_COLS : PSL_NUM_COLS), numCols);
    if ((pr->chrom == NULL) || (sameString(row[13], pr->chrom)))
        {
        if (pr->isPslx)
            return pslxLoad(row);
        else
            return pslLoad(row);
        }
    }
return NULL;
}
コード例 #18
0
ファイル: orthoMap.c プロジェクト: ucscGenomeBrowser/kent
struct psl *loadPslFromTable(struct sqlConnection *conn, char *table,
			     char *chrom, int chromStart, int chromEnd)
/** Load all of the psls between chromstart and chromEnd */
{
struct sqlResult *sr = NULL;
char **row = NULL;
int rowOffset = -100;
struct psl *pslList = NULL;
struct psl *psl = NULL;
int i=0;
sr = hRangeQuery(conn, table, chrom, chromStart, chromEnd, NULL, &rowOffset);
while ((row = sqlNextRow(sr)) != NULL)
    {
    psl = pslLoad(row+rowOffset);
    slSafeAddHead(&pslList, psl);
    }
sqlFreeResult(&sr);
slReverse(&pslList);
return pslList;
}
コード例 #19
0
void cloneAliPosTab(char *fileName, struct hash *cloneHash)
/* Write out clonePos.tab. */
{
char query[256];
struct sqlResult *sr;
char **row;
struct clonePos *posList = NULL, *pos;
struct cloneInfo *info;
struct sqlConnection *conn = hAllocConn();

sprintf(query, "select * from chr18_frags");
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct psl *psl = pslLoad(row);
    fragNameToCloneName(psl->qName);
    info = findClone(cloneHash, psl->qName);
    if ((pos = info->aliPos) == NULL)
	{
	AllocVar(pos);
	pos->info = info;
	info->aliPos = pos;
	pos->start = psl->tStart;
	pos->end = psl->tEnd;
	slAddHead(&posList, pos);
	}
    else
	{
	if (pos->start > psl->tStart)
	    pos->start = psl->tStart;
	if (pos->end < psl->tEnd)
	    pos->end = psl->tEnd;
	}
    pslFree(&psl);
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
slSort(&posList, cmpClonePos);
writePosList(fileName, posList, "chr18");
}
コード例 #20
0
struct psl *pslLoadByQuery(struct sqlConnection *conn, char *query)
/* Load all psl from table that satisfy the query given.  
 * Where query is of the form 'select * from example where something=something'
 * or 'select example.* from example, anotherTable where example.something = 
 * anotherTable.something'.
 * Dispose of this with pslFreeList(). */
{
struct psl *list = NULL, *el;
struct sqlResult *sr;
char **row;
int offSet = 0;
sr = sqlGetResult(conn, query);
offSet = sqlCountColumns(sr) - 21;
while ((row = sqlNextRow(sr)) != NULL)
    {
    el = pslLoad(row+offSet);
    slAddHead(&list, el);
    }
slReverse(&list);
sqlFreeResult(&sr);
return list;
}
コード例 #21
0
ファイル: retroClick.c プロジェクト: maximilianh/kent
static struct psl *loadAlign(struct sqlConnection *conn, struct mappingInfo *mi, int start)
/* load a psl that must exist */
{
char rootTable[256], table[256], query[256];
boolean hasBin;
struct sqlResult *sr;
char **row;
struct psl *psl;

if (mi->suffix == NULL)
    safef(rootTable, sizeof(rootTable), "%s%sAli", mi->tblPre, mi->geneSet);
else
    safef(rootTable, sizeof(rootTable), "%s%sAli%s", mi->tblPre, mi->geneSet,mi->suffix);
hFindSplitTable(database, seqName, rootTable, table, &hasBin);

sqlSafef(query, sizeof(query), "select * from %s where qName = '%s' and tStart = %d",
      table, mi->pg->name, start);
sr = sqlMustGetResult(conn, query);
row = sqlNextRow(sr);
psl = pslLoad(row+hasBin);
sqlFreeResult(&sr);
return psl;
}
コード例 #22
0
ファイル: altSplice.c プロジェクト: apmagalhaes/kentUtils
void loadPslsFromDatabase(struct sqlConnection *conn, char *db, char *chrom) 
/** Load all of the desired alignments into the chromkeeper structure
    from the desired pslTables. */
{
int i = 0;
struct sqlResult *sr = NULL;
char **row = NULL;
int rowOffset = 0;
struct psl *pslList = NULL, *psl = NULL;
for(i = 0; i < numDbTables; i++)
    {
    sr = hChromQuery(conn, dbTables[i], chrom, NULL, &rowOffset); 
    while((row = sqlNextRow(sr)) != NULL)
	{
	psl = pslLoad(row+rowOffset);
	slAddHead(&pslList, psl);
	minPslStart = min(psl->tStart, minPslStart);
	maxPslEnd = max(psl->tEnd, maxPslEnd);
	/* This just adds the mrna twice to the list, cheat way to add more
	   weight to certain tables. */
	if(weightMrna && (stringIn("refSeqAli", dbTables[i]) || stringIn("mrna", dbTables[i])))
	    {
	    psl = clonePsl(psl);
	    slAddHead(&pslList, psl);
	    }
	}
    sqlFreeResult(&sr);
    }

chromPslBin = binKeeperNew(minPslStart, maxPslEnd);
agxSeenBin = binKeeperNew(minPslStart, maxPslEnd);
for(psl = pslList; psl != NULL; psl = psl->next)
    {
    binKeeperAdd(chromPslBin, psl->tStart, psl->tEnd, psl);
    }
}
コード例 #23
0
ファイル: retroClick.c プロジェクト: maximilianh/kent
static struct psl *loadPslRangeT(char *table, char *qName, char *tName, int tStart, int tEnd)
/* Load a list of psls given qName tName tStart tEnd */
{
struct sqlResult *sr = NULL;
char **row;
struct psl *psl = NULL, *pslList = NULL;
boolean hasBin;
char splitTable[64];
char query[256];
struct sqlConnection *conn = hAllocConn(database);

hFindSplitTable(database, seqName, table, splitTable, &hasBin);
sqlSafef(query, sizeof(query), "select * from %s where qName = '%s' and tName = '%s' and tEnd > %d and tStart < %d", splitTable, qName, tName, tStart, tEnd);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    psl = pslLoad(row+hasBin);
    slAddHead(&pslList, psl);
    }
sqlFreeResult(&sr);
slReverse(&pslList);
hFreeConn(&conn);
return pslList;
}
コード例 #24
0
void somePsls(char *database, char *table, char *inList, char *outPsl)
/* somePsls - Get some psls from database. */
{
char *words[1], **row;
FILE *f = mustOpen(outPsl, "w");
struct lineFile *lf = lineFileOpen(inList, TRUE);
int count = 0, found = 0;
char query[256];
struct psl *psl;
struct sqlConnection *conn = sqlConnect(database);
struct sqlResult *sr;
while (lineFileRow(lf, words))
    {
    sqlSafef(query, sizeof query, "select * from %s where qName = '%s'", table, words[0]);
    sr = sqlGetResult(conn, query);
    while ((row = sqlNextRow(sr)) != NULL)
        {
	psl = pslLoad(row+1);
	pslTabOut(psl, f);
	pslFree(&psl);
	}
    sqlFreeResult(&sr);
    }
}
コード例 #25
0
/* Version for Zoo species */
boolean convertCoordinatesZoo(FILE *goodOut, FILE *badOut, 
			void (*goodResult)(FILE *out, struct coordConvRep *report),
			void (*badResult)(FILE *out, struct coordConvRep *report)) 
/* tries to convert coordinates and prints report 
 depending on function pointers provided. In generial
 goodResult and badResult either generate html or tesxt
 if we are in cgi or testing mode respectively. */
{
struct blatServerTable *serve = NULL;
struct coordConvRep *ccr = createCoordConvRep_mod();
struct dbDb *newDbRec = NULL, *oldDbRec = NULL;
struct sqlConnection *conn = sqlConnect(origGenome);
struct linkedFeatures *lfList = NULL, *lf;
struct sqlResult *sr = NULL;

boolean success = FALSE;

/* Keeps track if we're in an inverted match or not */
boolean inversion = FALSE;

/* Two possible reasons two fail */
boolean incoherent = FALSE;
boolean max_apart= FALSE;

char track[256];
char success_message[256];
char **row;
int rowOffset;
int conv_total=0;
int iteration = 0;

/* These two distances check how different the distance is between the converted and unconverted coordinates.  
   In this case if the distance between a converted versus unconverted block is more than 10 times
   and greater than 10 000 bases, set up a warning... */

int ref_end=0,ref_start,comp_end=0,comp_start=0;

/* Load info from databases into ccr */
oldDbRec = loadDbInformation_mod(origGenome);
ccr->from->chrom = cloneString(chrom);
ccr->from->chromStart = chromStart;
ccr->from->chromEnd = chromEnd;
ccr->from->version = cloneString(oldDbRec->name);
ccr->from->date = cloneString(oldDbRec->description);
ccr->from->nibDir = cloneString(oldDbRec->nibPath);
ccr->seqSize=1000;
newDbRec = loadDbInformation_mod(newGenome);
ccr->to->version = cloneString(newDbRec->name);
ccr->to->date = cloneString(newDbRec->description);
ccr->to->nibDir = cloneString(newDbRec->nibPath);
ccr->good=FALSE;

/* Create the correct track name...  Will have to be changed when multiple versions? */

sprintf(track,"%s_%s",origGenome,newGenome);

/* Get the information from loading the track. */
/* Double check we are not using a track connecting 1 and 2 */

if(!(strstr(track,"2") && strstr(track,"1")))
    {
    sr = hRangeQuery(conn, track, chrom, chromStart, chromEnd, NULL, &rowOffset);
    }

while ((row = sqlNextRow(sr)) != NULL)
    {
    /* Find the correponding track */
    struct psl *psl = pslLoad(row+rowOffset);
    
    /* If first time through... */
    if(iteration==0)
	{
	/* Fill in stuff if first time through... */
	ccr->to->chrom=cloneString(psl->qName);
	ccr->to->chromStart=psl->qStart;
	
	/* Actual point of conversion of coordinates */
	ccr->from->next->chromStart=psl->tStart;      
	ccr->good=TRUE;
	
	success=TRUE;
	}
    
    /* check for erroneous conversion if not first time through */
    /* Check for inversions, massive insertions... */
    
    /* Check for inversion (old start is "bigger" than new start)*/	
    
    if(iteration > 0)
	{
	if((comp_start> psl->qStart))
	    {
	    /* If not currently in an inversion state */
	    if(!inversion )
		/* If not the second time through (first time inversion could be detected) */
		if(iteration > 2)
		    incoherent=TRUE;
	    
	    /* Reset variables used for measuring distance... */
	    
	    /* Set inversion state variable to true */
	    inversion = TRUE;
	    
	    
	    /* Check to see if there are too great distances ... */
	    
	    if( ((comp_start - psl->qEnd)>(10 * (psl->tStart - ref_end))) && ((comp_start - psl->qEnd) > 10000))
		max_apart=TRUE;
	    }
	else 
	    /* No inversion */
	    {
	    /* Check if previous state was an inversion (then flip flop)...*/
	    if(inversion)
		incoherent = TRUE;
	    else
		{
		/* Check to see if the mapping is too far apart */
		if( ((psl->qStart - comp_end) > (10 * (psl->tStart - ref_end))) && ((psl->qStart - comp_end) > 10000))
		    max_apart=TRUE;
		}
	    }
	}
    
    if(inversion)
	{
	if(iteration == 1)
	    ccr->to->chromEnd=comp_end;
	
	ccr->to->chromStart=psl->qStart;
	}
    else
	ccr->to->chromEnd=psl->qEnd;
    
    ccr->from->next->chromEnd=psl->tEnd;
    
    if(max_apart || incoherent)
	{
	success=FALSE;
	break;
	}
    
    if(psl->tStart > ref_end)
	conv_total+=(psl->tEnd - psl->tStart);
    else
	conv_total+=(psl->tEnd - ref_end);
    
    ref_end=psl->tEnd;
    comp_end=psl->qEnd;
    ref_start=psl->tStart;
    comp_start=psl->qStart;
        
    iteration++;
    pslFree(&psl);
    }
		    
if(!success)
    {
    /* Check to see if using version two of zoo.  Not integrated into the database at this stage... */
    if((strstr(origGenome,"2") && strstr(newGenome,"1"))|| (strstr(newGenome,"2") && strstr(origGenome,"1")))
	sprintf(success_message,"Couldn't convert between these two genomes since the cross conversion between the two zoo dataset hasn't been fully integrated into the database");
    else if (max_apart)
	sprintf(success_message, "Coordinates couldn't reliably be converted between the two species.  Try using a smaller window. ");
    else if (incoherent)
	sprintf(success_message, "Coordinates couldn't be converted due to inconsistent inversions.");
    else
	sprintf(success_message,"Couldn't find a corresponding region for the original genome to the new genome.");
    
    ccr->msg=cloneString(success_message);
    badResult(badOut,ccr);
    }
else
    {
    sprintf(success_message,"Successfully converted (%3.1f%% of the original region was converted.)",((float)(conv_total * 100))/(float)(chromEnd-chromStart));
    ccr->msg=cloneString(success_message);
    goodResult(goodOut,ccr);
    }

dbDbFree(&oldDbRec);
dbDbFree(&newDbRec);
coordConvRepFreeList(&ccr); 
return success;
}
コード例 #26
0
void oneChrom(char *database, char *chrom, char *refAliTrack, char *bedTrack,
              struct hash *otherHash, struct stats *stats)
/* Process one chromosome. */
{
    struct bed *bedList = NULL, *bed;
    struct sqlConnection *conn = hAllocConn(database);
    struct sqlResult *sr;
    char **row;
    int rowOffset;
    int chromSize = hChromSize(database, chrom);
    struct binKeeper *bk = binKeeperNew(0, chromSize);
    struct psl *pslList = NULL;
    struct dnaSeq *chromSeq = NULL;

    if (endsWith(bedTrack, ".bed"))
    {
        struct lineFile *lf = lineFileOpen(bedTrack, TRUE);
        char *row[3];
        while (lineFileRow(lf, row))
        {
            if (sameString(chrom, row[0]))
            {
                bed = bedLoad3(row);
                slAddHead(&bedList, bed);
            }
        }
        lineFileClose(&lf);
    }
    else
    {
        sr = hChromQuery(conn, bedTrack, chrom, NULL, &rowOffset);
        while ((row = sqlNextRow(sr)) != NULL)
        {
            bed = bedLoad3(row+rowOffset);
            slAddHead(&bedList, bed);
        }
        sqlFreeResult(&sr);
    }
    slReverse(&bedList);
    uglyf("Loaded beds\n");

    sr = hChromQuery(conn, refAliTrack, chrom, NULL, &rowOffset);
    while ((row = sqlNextRow(sr)) != NULL)
    {
        struct psl *psl = pslLoad(row + rowOffset);
        slAddHead(&pslList, psl);
        binKeeperAdd(bk, psl->tStart, psl->tEnd, psl);
    }
    sqlFreeResult(&sr);
    uglyf("Loaded psls\n");

    chromSeq = hLoadChrom(database, chrom);
    /* Fetch entire chromosome into memory. */
    uglyf("Loaded human seq\n");

    for (bed = bedList; bed != NULL; bed = bed->next)
    {
        struct binElement *el, *list = binKeeperFind(bk, bed->chromStart, bed->chromEnd);
        for (el = list; el != NULL; el = el->next)
        {
            struct psl *fullPsl = el->val;
            struct psl *psl = pslTrimToTargetRange(fullPsl,
                                                   bed->chromStart, bed->chromEnd);
            if (psl != NULL)
            {
                foldPslIntoStats(psl, chromSeq, otherHash, stats);
                pslFree(&psl);
            }
        }
        slFreeList(&list);
        stats->bedCount += 1;
        stats->bedBaseCount += bed->chromEnd - bed->chromStart;
        sqlFreeResult(&sr);
    }
    freeDnaSeq(&chromSeq);
    pslFreeList(&pslList);
    binKeeperFree(&bk);
    hFreeConn(&conn);
}
コード例 #27
0
ファイル: chkAlignTbls.c プロジェクト: davidhoover/kent
static void chkPslTable(struct gbSelect* select, struct sqlConnection* conn,
                        char* rootTable, char* chrom,
                        struct metaDataTbls* metaDataTbls,
                        unsigned typeFlags)
/* Validate a PSL of a mrna/est to genome alignment against the metadata.  If
 * not a chromosome-specific table, chrom should be null.  Chromosome-specific
 * tables are not required to exist (for testing purposes).  Also count the
 * number of alignments of a mrna. */
{
struct hTableInfo* tableInfo;
char table[64];
unsigned iRow = 0;
unsigned rowOffset;
char accWhere[64];
char query[512];
struct sqlResult *sr;
char **row;

/* need to specify an explicit chrom table, as there is an mrna table which is
 * not psl, so using mrna as a root name with a chrom that doesn't exist
 * returns the mrna instead of null */

if (chrom != NULL)
    safef(table, sizeof(table), "%s_%s", chrom, rootTable);
else
    safef(table, sizeof(table), "%s", rootTable);

gbVerbEnter(3, "chkPslTable %s", table);

tableInfo = hFindTableInfo(select->release->genome->database, chrom, table);
if (tableInfo == NULL)
    {
    /* If all table, require it */
    if (chrom == NULL)
        {
        if (testMode)
            fprintf(stderr, "Warning: no psl table %s.%s\n",
                    select->release->genome->database, table);
        else
            gbError("no psl table %s.%s", select->release->genome->database,
                    table);
        }
    }
else
    {
    rowOffset = (tableInfo->hasBin) ? 1 : 0;
    // FIXME: might be better as sqlDyString
    accWhere[0] = '\0';
    if (select->accPrefix != NULL)
        sqlSafefFrag(accWhere, sizeof(accWhere), " WHERE qName LIKE '%s%%'",
              select->accPrefix);
    sqlSafef(query, sizeof(query), "SELECT * FROM %s%-s", table, accWhere);
    sr = sqlGetResult(conn, query);
    while ((row = sqlNextRow(sr)) != NULL)
        {
        struct psl* psl = pslLoad(row+rowOffset);
        chkPsl(psl, iRow, select->release->genome->database, table,
               metaDataTbls, typeFlags);
        pslFree(&psl);
        iRow++;
        }
    sqlFreeResult(&sr);
    }
gbVerbLeave(3, "chkPslTable %s", table);
}