Пример #1
0
unsigned int FieldElement::totNumLocalField() const 
{
	unsigned int ret = 0;
	unsigned int nd = numLocalData();
	for ( unsigned int i = 0; i < nd; ++i )
		ret += numField( i );
	return ret;
}
void liftAgp(char *destFile, struct hash *liftHash, int sourceCount, char *sources[])
    /* Lift up coordinates in .agp file. */
{
    FILE *dest = mustOpen(destFile, "w");
    char *source;
    int i;
    struct lineFile *lf;
    int lineSize, wordCount;
    char *line, *words[32];
    char *s;
    struct liftSpec *spec;
    int start = 0;
    int end = 0;
    int ix = 0;
    char newDir[256], newName[128], newExt[64];
    struct bigInsert *bi;
    struct chromInserts *chromInserts;
    struct hash *insertHash = newHash(8);
    struct hash *contigsHash = newHash(10);
    boolean firstContig = TRUE;
    char lastContig[256];
    char *contig;
    int lastEnd = 0;

    if (sourceCount < 2)
        usage();

    if (how == carryMissing)
        warn("'carry' doesn't work for .agp files, ignoring");

    splitPath(destFile, newDir, newName, newExt);

    /* Read in inserts file and process it. */
    chromInsertsRead(sources[0], insertHash);
    chromInserts = hashFindVal(insertHash, newName);

    strcpy(lastContig, "");
    for (i=1; i<sourceCount; ++i)
        {
        source = sources[i];
        verbose(1, "Lifting %s\n", source);
        lf = lineFileMayOpen(source, TRUE);
        if (lf != NULL)
            {
            while (lineFileNext(lf, &line, &lineSize))
                {
                /* Check for comments and just pass them through. */
                s = skipLeadingSpaces(line);
                if (s[0] == '#')
                    {
                    fprintf(dest, "%s\n", line);
                    continue;
                    }
                /* Parse line, adjust offsets, write */
                wordCount = chopLine(line, words);
                if (wordCount != 8 && wordCount != 9)
                    malformedAgp(lf);
                contig = words[0];
                if (!sameString(contig, lastContig))
                    {
                    char *gapType = "contig";
                    char *ctg = rmChromPrefix(contig);
                    int gapSize = chromInsertsGapSize(chromInserts, 
                            ctg, firstContig);
                    if (hashLookup(contigsHash, contig))
                        errAbort("Contig repeated line %d of %s", lf->lineIx, lf->fileName);
                    hashAdd(contigsHash, contig, NULL);
                    if (gapSize != 0)
                        {
                        if ((bi = bigInsertBeforeContig(chromInserts, ctg)) != NULL)
                            {
                            gapType = bi->type;
                            }
                        fprintf(dest, "%s\t%d\t%d\t%d\tN\t%d\t%s\tno\n",
                                newName, end+1, end+gapSize, ++ix, gapSize, gapType);
                        }
                    firstContig = FALSE;
                    strcpy(lastContig, contig);
                    }
                spec = findLift(liftHash, contig, lf);
		cantHandleSpecRevStrand(spec);
                start = numField(words, 1, 0, lf) + spec->offset;
                end = numField(words, 2, 0, lf) + spec->offset;
                if (end > lastEnd) lastEnd = end;
                if (!sameString(newName, spec->newName))
                    errAbort("Mismatch in new name between %s and %s", newName, spec->newName);
                fprintf(dest, "%s\t%d\t%d\t%d\t%s\t%s\t%s\t%s",
                        newName, start, end, ++ix,
                        words[4], words[5], words[6], words[7]);
                if (wordCount == 9)
                    fprintf(dest, "\t%s", words[8]);
                fputc('\n', dest);
                }
            lineFileClose(&lf);
            if (dots)
                verbose(1, "\n");
            }
        }
    if (chromInserts != NULL)
        {
        if ((bi = chromInserts->terminal) != NULL)
            {
            fprintf(dest, "%s\t%d\t%d\t%d\tN\t%d\t%s\tno\n",
                    newName, lastEnd+1, lastEnd+bi->size, ++ix, bi->size, bi->type);
            }
        }
    if (ferror(dest))
        errAbort("error writing %s", destFile);
    fclose(dest);
}
void liftOut(char *destFile, struct hash *liftHash, int sourceCount, char *sources[])
/* Lift up coordinates in .out file.  Add offset to id (15th) column to 
 * maintain non-overlapping id ranges for different input files. */
{
FILE *dest = mustOpen(destFile, "w");
char *source;
int i;
struct lineFile *lf;
int lineSize, wordCount;
char *line, *words[32];
char *s;
int begin, end, left;
char leftString[18];
int highestIdSoFar = 0, idOffset = 0;
char idStr[32];
struct liftSpec *spec;
char *newName;

rmskOutWriteHead(dest);
for (i=0; i<sourceCount; ++i)
    {
    source = sources[i];
    verbose(1, "Lifting %s\n", source);
    if (!fileExists(source))
	{
	warn("%s does not exist\n", source);
	continue;
	}
    lf = lineFileOpen(source, TRUE);
    if (!lineFileNext(lf, &line, &lineSize))
	{
	warn("%s is empty\n", source);
	lineFileClose(&lf);
	continue;
	}
    if (startsWith("There were no", line))
	{
	lineFileClose(&lf);
	continue;
	}
    skipLines(lf, 2);
    while (lineFileNext(lf, &line, &lineSize))
	{
	wordCount = chopLine(line, words);
	// 16 becomes 17, new field in RMasker June23 '03 - Hiram
	if (wordCount < 14 || wordCount > 17)
	    errAbort("Expecting 14-17 words (found %d) line %d of %s", wordCount, lf->lineIx, lf->fileName);
	if (wordCount >= 15)
	    {
	    if (words[14][0] == '*')
	    	{
		warn("Warning: 15th column has * (should be a numeric ID).\n");
	    	idStr[0] = '\0';
		}
	    else
	    	{
	    	int numId = sqlUnsigned(words[14]) + idOffset;
	    	if (numId > highestIdSoFar)
		    highestIdSoFar = numId;
	    	safef(idStr, sizeof(idStr), "%d", numId);
		}
	    }
	else
	    idStr[0] = '\0';
	begin = numField(words, 5, 0, lf);
	end = numField(words, 6, 0, lf);
	s = words[7];
	if (s[0] != '(')
	    errAbort("Expecting parenthesized field 8 line %d of %s", lf->lineIx, lf->fileName);
	left = numField(words, 7, 1, lf);
	spec = findLift(liftHash, words[4], lf);
	if (spec == NULL) 
	    {
	    if (how == carryMissing)
	        newName = words[4];
	    else
		continue;
	    }
	else
	    {
	    cantHandleSpecRevStrand(spec);
	    begin += spec->offset;
	    end += spec->offset;
	    left = spec->newSize - end;
	    newName = spec->newName;
	    }
	sprintf(leftString, "(%d)", left);
	fprintf(dest, 
	  "%5s %5s %4s %4s  %-9s %7d %7d %9s %1s  %-14s %-19s %6s %4s %6s %6s\n",
	  words[0], words[1], words[2], words[3], newName,
	  begin, end, leftString,
	  words[8], words[9], words[10], words[11], words[12], words[13], idStr);
	}
	lineFileClose(&lf);
	idOffset = highestIdSoFar;
    }
if (ferror(dest))
    errAbort("error writing %s", destFile);
fclose(dest);
}
void liftGap(char *destFile, struct hash *liftHash, int sourceCount, char *sources[])
    /* Lift up coordinates in .gap file (just the gaps from .agp).  Negative strad allowed */
{
    FILE *dest = mustOpen(destFile, "w");
    char *source;
    int i;
    struct lineFile *lf;
    int lineSize, wordCount;
    char *line, *words[32];
    char *s;
    struct liftSpec *spec;
    int start = 0;
    int end = 0;
    int ix = 0;
    char newDir[256], newName[128], newExt[64];
    char lastContig[256];
    char *contig;
    int lastEnd = 0;
    int fragStart, fragEnd;

    if (how == carryMissing)
        warn("'carry' doesn't work for .gap files, ignoring");

    splitPath(destFile, newDir, newName, newExt);

    strcpy(lastContig, "");
    for (i=0; i<sourceCount; ++i)
        {
        source = sources[i];
        verbose(1, "Lifting %s\n", source);
        lf = lineFileMayOpen(source, TRUE);
        if (lf != NULL)
            {
            while (lineFileNext(lf, &line, &lineSize))
                {
                /* Check for comments and just pass them through. */
                s = skipLeadingSpaces(line);
                if (s[0] == '#')
                    {
                    fprintf(dest, "%s\n", line);
                    continue;
                    }
                /* Parse line, adjust offsets, write */
                wordCount = chopLine(line, words);
                if (wordCount != 8 && wordCount != 9)
                    malformedAgp(lf);
                if (words[4][0] != 'N' && words[4][0] != 'U')
                    errAbort("Found non-gap in .gap file: %s", words[4]);
                contig = words[0];
                spec = findLift(liftHash, contig, lf);
                start = fragStart = numField(words, 1, 0, lf);
                end = fragEnd = numField(words, 2, 0, lf);
                end = fragEnd;
                if (spec->strand == '-')
                    {
                    start = spec->oldSize - fragEnd + 1;
                    end = spec->oldSize - fragStart + 1;
                }
            start += spec->offset;
            end += spec->offset;
	    if (end > lastEnd) lastEnd = end;
	    fprintf(dest, "%s\t%d\t%d\t%d\t%s\t%s\t%s\t%s",
		    spec->newName, start, end, ++ix,
		    words[4], words[5], words[6], words[7]);
	    if (wordCount == 9)
		fprintf(dest, "\t%s", words[8]);
	    fputc('\n', dest);
	    }
	lineFileClose(&lf);
        if (dots)
            verbose(1, "\n");
	}
    }
if (ferror(dest))
    errAbort("error writing %s", destFile);
fclose(dest);
}