static void DoTitle (SeqDescrPtr vnp, Pointer userdata) { Char ch; CharPtr ptr, str, tmp; ScanDataPtr sdp; if (vnp->choice != Seq_descr_title) return; str = (CharPtr) vnp->data.ptrvalue; if (StringHasNoText (str)) return; sdp = (ScanDataPtr) userdata; tmp = str; ptr = StringStr (tmp, "complete "); while (ptr != NULL) { tmp = ptr + 9; ch = *tmp; while (ch != '\0' && (! (IS_WHITESP (ch)))) { tmp++; ch = *tmp; } if (ch == '\0') return; if (StringNICmp (tmp, " genome", 7) == 0) { tmp [7] = '\0'; RecordTitle (sdp, ptr); return; } else if (StringNICmp (tmp, " DNA", 4) == 0) { tmp [4] = '\0'; RecordTitle (sdp, ptr); return; } else if (StringNICmp (tmp, " sequence", 9) == 0) { tmp [9] = '\0'; RecordTitle (sdp, ptr); return; } ptr = StringStr (tmp, "complete "); } if (StringStr (str, "genome DNA") != NULL) { RecordTitle (sdp, "genome DNA"); return; } if (sdp->bulk) return; if (StringStr (str, "genomic DNA") != NULL) { RecordTitle (sdp, "genomic DNA"); return; } }
static void ExciseProteinIDLine (CharPtr line) { CharPtr protein_id_line_start = NULL, protein_id_line_end; if (StringHasNoText (line)) { return; } protein_id_line_start = StringStr (line, "\n\t\t\tprotein_id\t"); if (protein_id_line_start == NULL) { return; } protein_id_line_end = StringChr (protein_id_line_start + 1, '\n'); if (protein_id_line_end == NULL) { return; } while (*protein_id_line_end != 0) { *protein_id_line_start = *protein_id_line_end; protein_id_line_start ++; protein_id_line_end ++; } *protein_id_line_start = 0; }
DocUid LIBCALL MMDBEvalPDB(CharPtr str) { Entrez2BooleanReplyPtr e2br; Entrez2IdListPtr e2id; Entrez2RequestPtr e2rq; Entrez2ReplyPtr e2ry; Char tmp [61]; Uint4 uid = 0; if (str == NULL) return 0; StringNCpy_0 (tmp, str, sizeof (tmp) - 10); if (StringStr (tmp, "[ACCN]") == NULL) { StringCat (tmp, " [ACCN]"); } e2rq = EntrezCreateBooleanRequest (TRUE, FALSE, "Structure", tmp, 0, 0, NULL, 1, 0); if (e2rq == NULL) return 0; e2ry = EntrezSynchronousQuery (e2rq); e2rq = Entrez2RequestFree (e2rq); if (e2ry == NULL) return 0; e2br = EntrezExtractBooleanReply (e2ry); if (e2br == NULL) return 0; if (e2br->count > 0) { e2id = e2br->uids; if (e2id != NULL && e2id->num > 0 && e2id->uids != NULL) { BSSeek (e2id->uids, 0, SEEK_SET); uid = Nlm_BSGetUint4 (e2id->uids); } } Entrez2BooleanReplyFree (e2br); return uid; /* LinkSetPtr plsLink = NULL; DocUid duUID = 0; if ((!str)) return (DocUid) 0; plsLink = EntrezTLEvalString(str, (DocType) TYP_ST, (DocField) FLD_ACCN, NULL, NULL); if (plsLink != NULL && plsLink->num > 0 && plsLink->uids != NULL) { duUID = plsLink->uids[0]; } LinkSetFree(plsLink); return duUID; */ }
Int2 Main (void) { AsnIoPtr aop = NULL; AsnModulePtr amp; AsnTypePtr atp_bss, atp_ss, atp_se; BioseqSet bss; FILE *fp; ValNodePtr head, vnp; Char path [PATH_MAX]; CharPtr progname, str, subfile; ErrSetFatalLevel (SEV_MAX); ErrClearOptFlags (EO_SHOW_USERSTR); UseLocalAsnloadDataAndErrMsg (); ErrPathReset (); if (! AllObjLoad ()) { Message (MSG_FATAL, "AllObjLoad failed"); return 1; } if (! SubmitAsnLoad ()) { Message (MSG_FATAL, "SubmitAsnLoad failed"); return 1; } if (! SeqCodeSetLoad ()) { Message (MSG_FATAL, "SeqCodeSetLoad failed"); return 1; } if (! GeneticCodeTableLoad ()) { Message (MSG_FATAL, "GeneticCodeTableLoad failed"); return 1; } MemSet ((Pointer) &bss, 0, sizeof (BioseqSet)); amp = AsnAllModPtr (); if (amp == NULL) { Message (MSG_FATAL, "Unable to load AsnAllModPtr"); return 1; } atp_bss = AsnFind ("Bioseq-set"); if (atp_bss == NULL) { Message (MSG_FATAL, "Unable to find ASN.1 type Bioseq-set"); return 1; } atp_ss = AsnFind ("Bioseq-set.seq-set"); if (atp_ss == NULL) { Message (MSG_FATAL, "Unable to find ASN.1 type Bioseq-set.seq-set"); return 1; } atp_se = AsnFind ("Bioseq-set.seq-set.E"); if (atp_se == NULL) { Message (MSG_FATAL, "Unable to find ASN.1 type Bioseq-set.seq-set.E"); return 1; } ProgramPath (path, sizeof (path)); progname = StringRChr (path, DIRDELIMCHR); if (progname != NULL) { progname++; } else { progname = "idcleanscan"; } if (! GetArgs (progname, sizeof (myargs) / sizeof (Args), myargs)) { return 0; } fp = FileOpen (myargs [o_argOutputFile].strvalue, "a"); if (fp == NULL) { Message (MSG_FATAL, "FileOpen failed"); return 1; } if (StringHasNoText (myargs [p_argInputPath].strvalue)) { str = myargs [i_argInputFile].strvalue; if (! StringHasNoText (str)) { DoReleaseFile (str, myargs [b_argBinaryFile].intvalue, myargs [c_argCompressed].intvalue, fp, aop, NULL); } } else { head = DirCatalog (myargs [p_argInputPath].strvalue); if (! StringHasNoText (myargs [s_argSubset].strvalue)) { aop = AsnIoOpen (myargs [s_argSubset].strvalue, /* "wb" */ "w"); AsnOpenStruct (aop, atp_bss, (Pointer) &bss); AsnOpenStruct (aop, atp_ss, (Pointer) bss.seq_set); /* av.intvalue = BioseqseqSet_class_genbank; AsnWrite (aop, atp_cls, &av); */ } for (vnp = head; vnp != NULL; vnp = vnp->next) { if (vnp->choice == 0) { str = (CharPtr) vnp->data.ptrvalue; if (! StringHasNoText (str)) { subfile = myargs [x_argFileSelect].strvalue; if (StringHasNoText (subfile) || StringStr (str, subfile) != NULL) { #ifdef OS_UNIX /* printf ("%s\n", str); */ #endif DoReleaseFile (str, myargs [b_argBinaryFile].intvalue, myargs [c_argCompressed].intvalue, fp, aop, atp_se); } } } } if (aop != NULL) { AsnCloseStruct (aop, atp_ss, (Pointer) bss.seq_set); AsnCloseStruct (aop, atp_bss, (Pointer) &bss); AsnIoClose (aop); } ValNodeFreeData (head); } FileClose (fp); return 0; }
Int2 Main (void) { CharPtr base, directory, organism, ptr; Boolean altstart, findorf, flatfile, validate; ValNodePtr head, vnp; ErrSetFatalLevel (SEV_MAX); ErrClearOptFlags (EO_SHOW_USERSTR); UseLocalAsnloadDataAndErrMsg (); ErrPathReset (); if (! AllObjLoad ()) { Message (MSG_FATAL, "AllObjLoad failed"); return 1; } if (! SubmitAsnLoad ()) { Message (MSG_FATAL, "SubmitAsnLoad failed"); return 1; } if (! SeqCodeSetLoad ()) { Message (MSG_FATAL, "SeqCodeSetLoad failed"); return 1; } if (! GeneticCodeTableLoad ()) { Message (MSG_FATAL, "GeneticCodeTableLoad failed"); return 1; } if (! FeatDefSetLoad ()) { Message (MSG_FATAL, "FeatDefSetLoad failed"); return 1; } if (! GetArgs ("sgd2asn", sizeof (myargs) / sizeof (Args), myargs)) { return 0; } directory = (CharPtr) myargs [0].strvalue; base = (CharPtr) myargs [1].strvalue; organism = (CharPtr) myargs [2].strvalue; findorf = (Boolean) myargs [3].intvalue; altstart = (Boolean) myargs [4].intvalue; validate = (Boolean) myargs [5].intvalue; flatfile = (Boolean) myargs [6].intvalue; if (! StringHasNoText (base)) { ptr = StringStr (base, ".fsa"); if (ptr != NULL) { *ptr = '\0'; ProcessOneRecord (directory, base, organism, findorf, altstart, validate, flatfile); } } else { head = DirCatalog (directory); for (vnp = head; vnp != NULL; vnp = vnp->next) { if (vnp->choice == 0) { base = (CharPtr) vnp->data.ptrvalue; if (! StringHasNoText (base)) { ptr = StringStr (base, ".fsa"); if (ptr != NULL) { *ptr = '\0'; ProcessOneRecord (directory, base, organism, findorf, altstart, validate, flatfile); } } } } ValNodeFreeData (head); } return 0; }
static Boolean s_ProcessOtherLine (AliConfigInfoPtr configPtr, CharPtr otherStr, AlignFileDataPtr fileInfoPtr) { CharPtr strPtr; CharPtr tmpStr; /* Check for datatype declaration */ if (((strPtr = StringStr (otherStr, "datatype")) != NULL) || ((strPtr = StringStr (otherStr, "DATATYPE")) != NULL) || ((strPtr = StringStr (otherStr, "Datatype")) != NULL)) { tmpStr = s_OtherGetValue(strPtr); if (StringICmp (tmpStr, "DNA") == 0) configPtr->declaredInfo.dataType = ALI_DATA_NUCLEOTIDE; else configPtr->declaredInfo.dataType = ALI_DATA_PROTEIN; MemFree (tmpStr); } /* Check for interleaved/contiguous */ if (((strPtr = StringStr (otherStr, "INTERLEAVED")) != NULL) || ((strPtr = StringStr (otherStr, "interleaved")) != NULL) || ((strPtr = StringStr (otherStr, "Interleaved")) != NULL)) configPtr->declaredInfo.contigOrInter = ALI_INTERLEAVED; else if (((strPtr = StringStr (otherStr, "CONTIGUOUS")) != NULL) || ((strPtr = StringStr (otherStr, "contiguous")) != NULL) || ((strPtr = StringStr (otherStr, "Contiguous")) != NULL)) configPtr->declaredInfo.contigOrInter = ALI_CONTIGUOUS; /* Check for dimensions */ if (((strPtr = StringStr (otherStr, "NTAX")) != NULL) || ((strPtr = StringStr (otherStr, "ntax")) != NULL) || ((strPtr = StringStr (otherStr, "nTax")) != NULL)) { tmpStr = s_OtherGetValue (strPtr); configPtr->declaredInfo.idCount = atoi(tmpStr); MemFree (tmpStr); } if (((strPtr = StringStr (otherStr, "NCHAR")) != NULL) || ((strPtr = StringStr (otherStr, "nchar")) != NULL) || ((strPtr = StringStr (otherStr, "nChar")) != NULL)) { tmpStr = s_OtherGetValue (strPtr); configPtr->declaredInfo.seqLength = atoi(tmpStr); MemFree (tmpStr); } /* Check for definition of missing character */ if (((strPtr = StringStr (otherStr, "MISSING")) != NULL) || ((strPtr = StringStr (otherStr, "missing")) != NULL) || ((strPtr = StringStr (otherStr, "Missing")) != NULL)) { tmpStr = s_OtherGetValue(strPtr); configPtr->missingChar = (CharPtr) MemNew (2); sprintf (configPtr->missingChar, "%c", tmpStr[0]); MemFree (tmpStr); /* If the new missing char conflicts with the */ /* gap or unaligned char, then blank them out */ /* to give the new one precedence. */ if (StringICmp (configPtr->missingChar, configPtr->gapChar) == 0) StringCpy (configPtr->gapChar, ""); if (StringICmp (configPtr->missingChar, configPtr->unalignedChar) == 0) StringCpy (configPtr->unalignedChar, ""); } /* Check for definition of gap character */ if (((strPtr = StringStr (otherStr, "GAP")) != NULL) || ((strPtr = StringStr (otherStr, "gap")) != NULL) || ((strPtr = StringStr (otherStr, "Gap")) != NULL)) { tmpStr = s_OtherGetValue(strPtr); configPtr->gapChar = (CharPtr) MemNew (2); sprintf (configPtr->gapChar, "%c", tmpStr[0]); MemFree (tmpStr); /* If the new gap char conflicts with the missing */ /* or unaligned char, then blank them out to give */ /* the new one precedence. */ if (StringICmp (configPtr->gapChar, configPtr->missingChar) == 0) StringCpy (configPtr->missingChar, ""); if (StringICmp (configPtr->gapChar, configPtr->unalignedChar) == 0) StringCpy (configPtr->unalignedChar, ""); } /* Check for definition of unaligned character */ if (((strPtr = StringStr (otherStr, "UNALIGNED")) != NULL) || ((strPtr = StringStr (otherStr, "unaligned")) != NULL) || ((strPtr = StringStr (otherStr, "Unaligned")) != NULL)) { tmpStr = s_OtherGetValue(strPtr); configPtr->unalignedChar = (CharPtr) MemNew (2); sprintf (configPtr->unalignedChar, "%c", tmpStr[0]); MemFree (tmpStr); /* If the new unaligned char conflicts with the */ /* gap or missing char, then blank them out to */ /* give the new one precedence. */ if (StringICmp (configPtr->unalignedChar, configPtr->gapChar) == 0) StringCpy (configPtr->gapChar, ""); if (StringICmp (configPtr->unalignedChar, configPtr->missingChar) == 0) StringCpy (configPtr->missingChar, ""); } /* Return successfully */ return TRUE; }
/* return -1 on failure, 0 on success */ static Int4 FileRecurse ( CharPtr directory, InputStreamPtr isp, OutputStreamPtr osp, AsnStreamPtr asp, Int4Ptr gap_sizes ) { Char path [PATH_MAX]; CharPtr ptr; CharPtr str; ValNodePtr head, vnp; CharPtr orig_dir, orig_base; Int4 rval = 0; /* get list of all files in source directory */ head = DirCatalog (directory); for (vnp = head; vnp != NULL; vnp = vnp->next) { if (vnp->choice == 0) { str = (CharPtr) vnp->data.ptrvalue; if (StringDoesHaveText (str)) { /* does filename have desired substring? */ ptr = StringStr (str, osp->suffix); if (ptr != NULL) { /* make sure detected suffix is really at end of filename */ if (StringCmp (ptr, osp->suffix) == 0) { *ptr = '\0'; /* process file that has desired suffix (usually .fsa) */ osp->base = str; orig_dir = isp->directory; isp->directory = directory; orig_base = isp->base; isp->base = str; if (isp->is_binary) { rval |= ProcessStream (isp, osp, asp, gap_sizes); } else { rval |= ProcessOneRecord (directory, osp, gap_sizes); } isp->directory = orig_dir; isp->base = orig_base; osp->base = NULL; } } } } else if (vnp->choice == 1) { /* recurse into subdirectory */ StringNCpy_0 (path, directory, sizeof (path)); str = (CharPtr) vnp->data.ptrvalue; FileBuildPath (path, str, NULL); rval |= FileRecurse (path, isp, osp, asp, gap_sizes); } } /* clean up file list */ ValNodeFreeData (head); return rval; }
static Boolean LIBCALLBACK FirstVecScreenCallback ( CONN conn, Nlm_VoidPtr userdata, EIO_Status status ) { VQueuePtr cqp; FILE *fp; Char line [256]; Char path [PATH_MAX]; CharPtr rid; CharPtr rtoe; CharPtr str; long int val; /* read rID or failure message */ cqp = (VQueuePtr) userdata; TmpNam (path); fp = FileOpen (path, "w"); QUERY_CopyResultsToFile (conn, fp); FileClose (fp); fp = FileOpen (path, "r"); str = ReadALine (line, sizeof (line), fp); while (str != NULL) { if (! StringHasNoText (line)) { if (line [0] == '>') { if (StringNICmp (line, ">Vector", 7) == 0) { rid = StringStr (line, "RID: "); if (rid != NULL) { rid += 5; rtoe = StringStr (rid, " RTOE: "); if (rtoe != NULL) { *rtoe = '\0'; rtoe += 7; StringNCpy_0 (cqp->rid, rid, sizeof (cqp->rid)); if (sscanf (rtoe, "%ld", &val) == 1) { cqp->estTime = (time_t) val; cqp->secondsToWait = (Int2) val + 2; } else { cqp->secondsToWait = 15; } if (cqp->secondsToWait > 15) { cqp->secondsToWait = 15; } if (cqp->announceproc != NULL) { cqp->announceproc (rid, cqp->seqid, (Int2) val); } } } else if (StringStr (line, "FAILED") != NULL) { cqp->done = TRUE; if (cqp->resultproc != NULL) { cqp->resultproc (NULL, cqp->userdata, cqp->rid, cqp->seqid, FALSE); } } } else if (StringNICmp (line, ">Message", 8) == 0) { str = ReadALine (line, sizeof (line), fp); while (str != NULL && StringNCmp (line, "//", 2) != 0) { Message (MSG_POST, "%s\n", str); if (StringStr (line, "FAILURE") != NULL) { cqp->done = TRUE; } str = ReadALine (line, sizeof (line), fp); } } } } str = ReadALine (line, sizeof (line), fp); } FileClose (fp); FileRemove (path); return TRUE; }
static Boolean LIBCALLBACK SecondVecScreenCallback ( CONN conn, Nlm_VoidPtr userdata, EIO_Status status ) { VQueuePtr cqp; FILE *fp; Char line [256]; Char path [PATH_MAX]; CharPtr rid; CharPtr sttus; CharPtr str; Boolean success = FALSE; Boolean waiting = FALSE; /* look for waiting, failure, or success */ cqp = (VQueuePtr) userdata; TmpNam (path); fp = FileOpen (path, "w"); QUERY_CopyResultsToFile (conn, fp); FileClose (fp); fp = FileOpen (path, "r"); str = ReadALine (line, sizeof (line), fp); while (str != NULL) { if (! StringHasNoText (line)) { if (line [0] == '>') { if (StringNICmp (line, ">Vector", 7) == 0) { rid = StringStr (line, "RID: "); if (rid != NULL) { rid += 5; sttus = StringStr (rid, " Status: "); if (sttus != NULL) { *sttus = '\0'; sttus += 9; if (StringCmp (cqp->rid, rid) != 0) { ErrPostEx (SEV_ERROR, 0, 0, "RID mismatch '%s' vs '%s'", cqp->rid, rid); cqp->done = TRUE; } else if (StringStr (sttus, "FAILED") != NULL) { cqp->done = TRUE; } else if (StringStr (sttus, "unknown") != NULL) { ErrPostEx (SEV_ERROR, 0, 0, "RID unknown '%s'", rid); cqp->done = TRUE; } else if (StringStr (sttus, "SUCCESS") != NULL) { success = TRUE; } else if (StringStr (sttus, "WAITING") != NULL) { waiting = TRUE; /* Message (MSG_POST, "WAITING"); */ } } } } else if (StringNICmp (line, ">Message", 8) == 0) { str = ReadALine (line, sizeof (line), fp); while (str != NULL && StringNCmp (line, "//", 2) != 0) { Message (MSG_POST, "%s\n", str); if (StringStr (line, "FAILURE") != NULL) { if (! waiting) { cqp->done = TRUE; } } str = ReadALine (line, sizeof (line), fp); } } } } str = ReadALine (line, sizeof (line), fp); } FileClose (fp); if (success) { cqp->resultproc (path, cqp->userdata, cqp->rid, cqp->seqid, success); cqp->done = TRUE; } else if (cqp->done) { cqp->resultproc (NULL, cqp->userdata, cqp->rid, cqp->seqid, success); } FileRemove (path); return TRUE; }
Int2 Main (void) { CharPtr base, directory, outfile, suffix, ptr; BioseqSetPtr bssp; ValNodePtr head, vnp; SeqEntryPtr sep; SeqSubmitPtr ssp; /* standard setup */ ErrSetFatalLevel (SEV_MAX); ErrClearOptFlags (EO_SHOW_USERSTR); UseLocalAsnloadDataAndErrMsg (); ErrPathReset (); /* finish resolving internal connections in ASN.1 parse tables */ if (! AllObjLoad ()) { Message (MSG_FATAL, "AllObjLoad failed"); return 1; } if (! SubmitAsnLoad ()) { Message (MSG_FATAL, "SubmitAsnLoad failed"); return 1; } if (! FeatDefSetLoad ()) { Message (MSG_FATAL, "FeatDefSetLoad failed"); return 1; } if (! SeqCodeSetLoad ()) { Message (MSG_FATAL, "SeqCodeSetLoad failed"); return 1; } if (! GeneticCodeTableLoad ()) { Message (MSG_FATAL, "GeneticCodeTableLoad failed"); return 1; } /* process command line arguments */ if (! GetArgs ("subfuse", sizeof (myargs) / sizeof (Args), myargs)) { return 0; } directory = (CharPtr) myargs [p_argInputPath].strvalue; outfile = (CharPtr) myargs [o_argOutputFile].strvalue; suffix = (CharPtr) myargs [x_argSuffix].strvalue; bssp = BioseqSetNew (); if (bssp == NULL) return 0; bssp->_class = BioseqseqSet_class_genbank; sep = SeqEntryNew (); if (sep == NULL) return 0; sep->choice = 2; sep->data.ptrvalue = (Pointer) bssp; ssp = SeqSubmitNew (); if (ssp == NULL) return 0; ssp->datatype = 1; ssp->data = (Pointer) sep; /* get list of all files in source directory */ head = DirCatalog (directory); for (vnp = head; vnp != NULL; vnp = vnp->next) { if (vnp->choice == 0) { base = (CharPtr) vnp->data.ptrvalue; if (! StringHasNoText (base)) { ptr = StringStr (base, suffix); if (ptr != NULL) { *ptr = '\0'; Message (MSG_POST, "Processing %s\n", base); ProcessOneRecord (ssp, bssp, directory, base, suffix); } } } } /* clean up file list */ ValNodeFreeData (head); /* write output file */ WriteOneSubmission (outfile, ssp); return 0; }