static BioseqPtr FetchOnlyBioseqFromID (CharPtr str) { BioseqPtr bsp = NULL; Uint2 entityID; SeqEntryPtr sep; SeqIdPtr sip = NULL; Int4 uid; time_t t1, t2; t1 = time (NULL); uid = 0; TrimSpacesAroundString (str); if (IsAllDigits (str)) { if (PvtStrToLong (str, &uid)) { sip = ValNodeNew(NULL); sip->choice = SEQID_GI; sip->data.intvalue = uid; } else { uid = 0; } } else { sip = SeqIdFromAccessionDotVersion (str); uid = AccessionToGi (str); } sep = NULL; if (uid > 0) { sep = PubSeqSynchronousQuery (uid, 3, 0); /* retcode was 0 */ if (sep != NULL) { bsp = BioseqFindInSeqEntry (sip, sep); entityID = ObjMgrGetEntityIDForChoice (sep); } } sip = SeqIdFree (sip); if (debug_mode) { t2 = time (NULL); if (t2 - t1 > 1) { printf ("Time to download %s from ID:%d\n", str, (int) (t2 - t1)); } } return bsp; }
static Int2 LIBCALLBACK TPASmartBioseqFetchFunc (Pointer data) { BioseqPtr bsp; Char cmmd [256]; Pointer dataptr; Uint2 datatype; Uint2 entityID; FILE* fp; OMProcControlPtr ompcp; ObjMgrProcPtr ompp; Char path [PATH_MAX]; Char err_path [PATH_MAX]; SeqEntryPtr sep = NULL; SeqIdPtr sip; TextSeqIdPtr tsip; ompcp = (OMProcControlPtr) data; if (ompcp == NULL) return OM_MSG_RET_ERROR; ompp = ompcp->proc; if (ompp == NULL) return OM_MSG_RET_ERROR; sip = (SeqIdPtr) ompcp->input_data; if (sip == NULL) return OM_MSG_RET_ERROR; if (sip->choice != SEQID_TPG) return OM_MSG_RET_ERROR; tsip = (TextSeqIdPtr) sip->data.ptrvalue; if (tsip == NULL || StringHasNoText (tsip->accession)) return OM_MSG_RET_ERROR; if (tpasmartfetchcmd == NULL) { if (GetAppParam ("SEQUIN", "TPASMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) { tpasmartfetchcmd = StringSaveNoNull (cmmd); } } if (tpasmartfetchcmd == NULL) return OM_MSG_RET_ERROR; TmpNam (path); #ifdef OS_UNIX sprintf (err_path, "%s.err", path); sprintf (cmmd, "csh %s %s > %s 2>%s", tpasmartfetchcmd, tsip->accession, path, err_path); system (cmmd); #endif #ifdef OS_MSWIN sprintf (cmmd, "%s %s -o %s", tpasmartfetchcmd, tsip->accession, path); system (cmmd); #endif fp = FileOpen (path, "r"); if (fp == NULL) { FileRemove (path); #ifdef OS_UNIX FileRemove (err_path); #endif return OM_MSG_RET_ERROR; } dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE); FileClose (fp); FileRemove (path); #ifdef OS_UNIX FileRemove (err_path); #endif if (dataptr == NULL) return OM_MSG_RET_OK; sep = GetTopSeqEntryForEntityID (entityID); if (sep == NULL) return OM_MSG_RET_ERROR; bsp = BioseqFindInSeqEntry (sip, sep); ompcp->output_data = (Pointer) bsp; ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep); return OM_MSG_RET_DONE; }
static void ProcessMultipleRecord ( CharPtr filename, CSpeedFlagPtr cfp ) { AsnIoPtr aip; AsnTypePtr atp; BioseqPtr bsp; Char buf [41]; Uint2 entityID; FILE *fp; SeqEntryPtr fsep; Char longest [41]; Int4 numrecords, x; SeqEntryPtr sep; time_t starttime, stoptime, worsttime; #ifdef OS_UNIX Char cmmd [256]; CharPtr gzcatprog; int ret; Boolean usedPopen = FALSE; #endif if (cfp == NULL) return; if (StringHasNoText (filename)) return; #ifndef OS_UNIX if (cfp->compressed) { Message (MSG_POSTERR, "Can only decompress on-the-fly on UNIX machines"); return; } #endif #ifdef OS_UNIX if (cfp->compressed) { gzcatprog = getenv ("NCBI_UNCOMPRESS_BINARY"); if (gzcatprog != NULL) { sprintf (cmmd, "%s %s", gzcatprog, filename); } else { ret = system ("gzcat -h >/dev/null 2>&1"); if (ret == 0) { sprintf (cmmd, "gzcat %s", filename); } else if (ret == -1) { Message (MSG_POSTERR, "Unable to fork or exec gzcat in ScanBioseqSetRelease"); return; } else { ret = system ("zcat -h >/dev/null 2>&1"); if (ret == 0) { sprintf (cmmd, "zcat %s", filename); } else if (ret == -1) { Message (MSG_POSTERR, "Unable to fork or exec zcat in ScanBioseqSetRelease"); return; } else { Message (MSG_POSTERR, "Unable to find zcat or gzcat in ScanBioseqSetRelease - please edit your PATH environment variable"); return; } } } fp = popen (cmmd, /* cfp->binary? "rb" : */ "r"); usedPopen = TRUE; } else { fp = FileOpen (filename, cfp->binary? "rb" : "r"); } #else fp = FileOpen (filename, cfp->binary? "rb" : "r"); #endif if (fp == NULL) { Message (MSG_POSTERR, "FileOpen failed for input file '%s'", filename); return; } aip = AsnIoNew (cfp->binary? ASNIO_BIN_IN : ASNIO_TEXT_IN, fp, NULL, NULL, NULL); if (aip == NULL) { Message (MSG_ERROR, "AsnIoNew failed for input file '%s'", filename); return; } if (cfp->logfp != NULL) { fprintf (cfp->logfp, "%s\n\n", filename); fflush (cfp->logfp); } longest [0] = '\0'; worsttime = 0; numrecords = 0; atp = cfp->atp_bss; while ((atp = AsnReadId (aip, cfp->amp, atp)) != NULL) { if (atp == cfp->atp_se) { sep = SeqEntryAsnRead (aip, atp); if (sep != NULL) { entityID = ObjMgrGetEntityIDForChoice (sep); fsep = FindNthBioseq (sep, 1); if (fsep != NULL && fsep->choice == 1) { bsp = (BioseqPtr) fsep->data.ptrvalue; if (bsp != NULL) { SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf)); if (cfp->logfp != NULL) { fprintf (cfp->logfp, "%s\n", buf); fflush (cfp->logfp); } } } starttime = GetSecs (); for (x = 0; x < cfp->maxcount; x++) { DoProcess (sep, entityID, cfp); } stoptime = GetSecs (); if (stoptime - starttime > worsttime) { worsttime = stoptime - starttime; StringCpy (longest, buf); } numrecords++; ObjMgrFreeByEntityID (entityID); } } else { AsnReadVal (aip, atp, NULL); } } AsnIoFree (aip, FALSE); #ifdef OS_UNIX if (usedPopen) { pclose (fp); } else { FileClose (fp); } #else FileClose (fp); #endif if (cfp->logfp != NULL && (! StringHasNoText (longest))) { fprintf (cfp->logfp, "Longest processing time %ld seconds on %s\n", (long) worsttime, longest); fprintf (cfp->logfp, "Total number of records %ld\n", (long) numrecords); fflush (cfp->logfp); } }