static SeqIdPtr SmartGuessMakeId (CharPtr str) { CharPtr id_txt; SeqIdPtr sip = NULL; if (StringHasNoText (str)) { return NULL; } else if (StringChr (str, '|') != NULL) { sip = MakeSeqID (str); } else if (IsAllDigits (str)) { id_txt = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + 4)); sprintf (id_txt, "gi|%s", str); sip = MakeSeqID (id_txt); id_txt = MemFree (id_txt); } else if (StringChr (str, '_') != NULL) { id_txt = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + 5)); sprintf (id_txt, "oth|%s", str); sip = MakeSeqID (id_txt); id_txt = MemFree (id_txt); } else { id_txt = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + 4)); sprintf (id_txt, "gb|%s", str); sip = MakeSeqID (id_txt); id_txt = MemFree (id_txt); } return sip; }
Boolean IsProteinChar (Char ch) { if (StringChr("ABCDEFGHIKLMNPQRSTUVWXYZ*abcdefghiklmnpqrstuvwxyz",ch) != NULL) return TRUE; else return FALSE; }
Boolean IsNucleotideChar (Char ch) { if (StringChr("abcdghkmnrstuvwxyABCDGHKMNRSTUVWXY",ch) != NULL) return TRUE; else return FALSE; }
Int2 IsValidIdChar (Char idChar) { if (StringChr("ABCDEFGHIJKLMNOPQRSTUVWXYZ",idChar) != NULL) return ID_GOOD_CHAR_LETTER; if (StringChr("abcdefghijklmnopqrstuvwxyz",idChar) != NULL) return ID_GOOD_CHAR_LETTER; if (StringChr("0123456789",idChar) != NULL) return ID_GOOD_CHAR_NUMBER; if (StringChr("\"._-|",idChar) != NULL) return ID_GOOD_CHAR_OTHER; return FALSE; }
static void ExciseProteinIDLine (CharPtr line) { CharPtr protein_id_line_start = NULL, protein_id_line_end; if (StringHasNoText (line)) { return; } protein_id_line_start = StringStr (line, "\n\t\t\tprotein_id\t"); if (protein_id_line_start == NULL) { return; } protein_id_line_end = StringChr (protein_id_line_start + 1, '\n'); if (protein_id_line_end == NULL) { return; } while (*protein_id_line_end != 0) { *protein_id_line_start = *protein_id_line_end; protein_id_line_start ++; protein_id_line_end ++; } *protein_id_line_start = 0; }
void WriteStdoutRamaOneModel(PMSD pmsdRoot, Int2 ModelNum ){ ValNodePtr vnpRama = NULL; PRS prsHead = NULL; PRS prsHere = NULL; float phi,psi; int num; Int4 linelen; Char aa[3]; Char ctemp[30]; Char chain[30]; PMGD pmgdAA = NULL; CharPtr NCBIstdaaUC = "-ABCDEFGHIKLMNPQRSTVWXYZU*"; vnpRama=ConvertNode((PFB)pmsdRoot,AM_MGD); if (vnpRama==NULL) return; prsHead=Rama(vnpRama,ModelNum); prsHere=prsHead; while(prsHere) { phi = 0.0; psi = 0.0; phi=(float)prsHere->Phi; psi=(float)prsHere->Psi; pmgdAA = (PMGD) prsHere->pfbThis; num = (int) (pmgdAA->pdnmgLink->choice); StringCpy(ctemp, StringChr(NCBIstdaaUC,pmgdAA->pcIUPAC[0])); StringCpy(chain, ParentMolName((PFB) pmgdAA)); aa[0] = ctemp[0]; aa[1] = '\0'; printf("%s, %s, %d, %f, %f\n", chain, aa , (int) num, phi,psi); prsHere=prsHere->next; } freeRS(prsHead); return; }
static ValNodePtr FieldsFromFieldListString (CharPtr str) { CharPtr cpy, val, comma; Int4 qual; ValNodePtr field_list = NULL, qc; if (StringHasNoText (str)) { return NULL; } cpy = StringSave (str); val = cpy; comma = StringChr(val, ','); while (comma != NULL) { *comma = 0; qual = GetSourceQualTypeByName(val); if (qual < 0) { Message (MSG_ERROR, "%s is not a recognized source field name", val); } else { qc = ValNodeNew (NULL); qc->choice = SourceQualChoice_textqual; qc->data.intvalue = qual; ValNodeAddPointer (&field_list, FieldType_source_qual, qc); } *comma = ','; val = comma + 1; comma = StringChr (val, ','); } qual = GetSourceQualTypeByName(val); if (qual < 0) { Message (MSG_ERROR, "%s is not a recognized source field name", val); } else { qc = ValNodeNew (NULL); qc->choice = SourceQualChoice_textqual; qc->data.intvalue = qual; ValNodeAddPointer (&field_list, FieldType_source_qual, qc); } cpy = MemFree (cpy); return field_list; }
static void DoNewFastaDefline ( BioseqPtr bsp, Pointer userdata ) { BioseqSetPtr bssp; CSpeedFlagPtr cfp; Char id [128]; CharPtr title; if (bsp == NULL) return; cfp = (CSpeedFlagPtr) userdata; if (cfp == NULL) return; if (StringChr (cfp->skip, 's') != NULL) { if (bsp->idx.parenttype == OBJ_BIOSEQSET) { bssp = (BioseqSetPtr) bsp->idx.parentptr; if (bssp != NULL) { if (bssp->_class == BioseqseqSet_class_segset || bssp->_class == BioseqseqSet_class_parts) return; } } } if (StringChr (cfp->skip, 'v') != NULL) { if (bsp->repr == Seq_repr_virtual) return; } id [0] = '\0'; SeqIdWrite (bsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1); title = NewCreateDefLine (NULL, bsp, FALSE, FALSE); if (StringHasNoText (title)) { title = StringSave ("?"); } if (cfp->ofp != NULL) { fprintf (cfp->ofp, ">%s %s\n", id, title); } MemFree (title); }
CharPtr s_OtherGetValue (CharPtr otherStr) { CharPtr tempStrPtr; CharPtr valueBuff; Int2 charCount; /* Go to the first character after the '=' */ if ((tempStrPtr = StringChr (otherStr, '=')) == NULL) return NULL; tempStrPtr++; /* Skip spaces */ while (*tempStrPtr == ' ') tempStrPtr++; if (*tempStrPtr == '\0') return NULL; /* Get the value */ valueBuff = (CharPtr) MemNew (ALI_MAX_LINE_LEN); charCount = 0; while ((*tempStrPtr != ' ') && (*tempStrPtr != '\0')) { valueBuff[charCount] = *tempStrPtr; charCount++; tempStrPtr++; } valueBuff[charCount] = '\0'; /* Return successfully */ return valueBuff; }
static DefLineInfoPtr s_ParseDefLine (CharPtr lineStr, Int4 rowNum, ErrInfoPtr PNTR errorListPtr) { Char ch; CharPtr defStr; CharPtr idStr; Int4 defPosition; Int4 idPosition; Int4 position; Int2 state; DefLineInfoPtr defLinePtr = NULL; ErrInfoPtr errPtr; defPosition = 0; idPosition = 0; defStr = (CharPtr) MemNew (StringLen(lineStr)+1); idStr = (CharPtr) MemNew (StringLen(lineStr)+1); /* Parse the line character by character */ state = DEFLINE_PRE_DATA; for (position = 0; lineStr[position] != '\0'; position++) { ch = lineStr[position]; switch (state) { case DEFLINE_PRE_DATA : if (IS_WHITESP(ch)) continue; else if (ch == '>') state = DEFLINE_SEQID; else { MemFree(defStr); MemFree(idStr); return NULL; /* Not a defline */ } break; case DEFLINE_SEQID : if (IsValidIdChar(ch)) { idStr[idPosition] = ch; idPosition++; } else if (IS_WHITESP(ch)) { if (idPosition > 0) { state = DEFLINE_DEFINITION; defStr[defPosition] = ch; defPosition++; } else continue; } else if (ch == '[') { state = DEFLINE_DEFINITION; defStr[defPosition] = ch; defPosition++; } else { errPtr = Ali_AddError (errorListPtr, ERR_INVALID_DEFLINE, lineStr, (Int4) ch); errPtr->rowNum = rowNum; MemFree(defStr); MemFree(idStr); return NULL; } break; case DEFLINE_DEFINITION : defStr[defPosition] = ch; defPosition++; break; default: break; } } /* Check for blank line */ if (state == DEFLINE_PRE_DATA) { MemFree(defStr); MemFree(idStr); return NULL; } idStr[idPosition] = '\0'; defStr[defPosition] = '\0'; /* Make sure that it has at least one */ /* set of square brackets. */ if ((StringChr(defStr,'[') == NULL) || (StringChr(defStr,']') == NULL)) { errPtr = Ali_AddError (errorListPtr, ERR_DEFLINE_NODEFS, lineStr); errPtr->rowNum = rowNum; } /* If we made it to here, then */ /* it's a valid definition line. */ defLinePtr = (DefLineInfoPtr) MemNew (sizeof (DefLineInfo)); if (StringLen (defStr) != 0) defLinePtr->definitions = defStr; else defLinePtr->definitions = NULL; if (StringLen (idStr) != 0) defLinePtr->id = idStr; else defLinePtr->id = NULL; return defLinePtr; }
/*************************************************************************** * CkQualSeqaa: * -- format (seq:"codon-sequence", aa:amino_acid) * -- example /codon=(seq:"ttt",aa:Leu) * /codon=(seq: "ttt", aa: Leu ) * 6-29-93 ***************************************************************************/ NLM_EXTERN int CkQualSeqaa (GBQualPtr PNTR head_gbqp, GBQualPtr gbqp, GBQualPtr preq, Boolean error_msgs, Boolean perform_corrections) { CharPtr eptr, str, aa; int retval = GB_FEAT_ERR_NONE; str = gbqp->val; if (StringNICmp(str, "(seq:", 5) == 0) { str += 5; while (*str == ' ') ++str; if ((eptr = StringChr(str, ',')) != NULL) { while (str != eptr) str++; while (*str != '\0' && (*str == ',' || *str == ' ')) str++; if (StringNICmp(str, "aa:", 3) == 0) { str += 3; while (*str == ' ') ++str; if ((eptr = StringChr(str, ')')) != NULL) { aa = TextSave(str, eptr-str); retval = CkQualPosSeqaa(head_gbqp, gbqp, preq, error_msgs, perform_corrections, aa, eptr); } } /* if, aa: */ else{ if (error_msgs){ ErrPostEx(SEV_ERROR, ERR_QUALIFIER_AA, "Missing aa: /%s=%s",gbqp->qual,gbqp->val); } retval = GB_FEAT_ERR_DROP; if (perform_corrections){ DeleteGBQualFromList(head_gbqp, gbqp, preq); } } }else{ if (error_msgs){ ErrPostEx(SEV_ERROR, ERR_QUALIFIER_SeqPosComma, "Missing \',\' /%s=%s",gbqp->qual,gbqp->val); /* ) match */ } retval = GB_FEAT_ERR_DROP; if (perform_corrections){ DeleteGBQualFromList(head_gbqp, gbqp, preq); } } } /* if, (seq: */ else { if (error_msgs){ ErrPostEx(SEV_ERROR, ERR_QUALIFIER_Seq, "Missing (seq: /%s=%s",gbqp->qual,gbqp->val); /* ) match */ } retval = GB_FEAT_ERR_DROP; if (perform_corrections){ DeleteGBQualFromList(head_gbqp, gbqp, preq); } } return retval; } /* CkQualSeqaa */
/*************************************************************************** * CkQualPosaa: * * -- format (pos:base_range, aa:amino_acid) * -- example /anticodon=(pos:34..36,aa:Phe) * /anticodon=(pos: 34..36, aa: Phe) * 10-12-93 ****************************************************************************/ NLM_EXTERN int CkQualPosaa(GBQualPtr PNTR head_gbqp, GBQualPtr gbqp, GBQualPtr preq, Boolean error_msgs, Boolean perform_corrections) { CharPtr eptr, str, aa = NULL; int retval = GB_FEAT_ERR_NONE; str = gbqp->val; if (StringNICmp(str, "(pos:", 5) == 0) { str += 5; while (*str == ' ') ++str; /*---I expect that we maight need to allow blanks here, but not now... -Karl 1/28/94 */ if ((eptr = StringChr(str, ',')) != NULL) { while (str != eptr && (IS_DIGIT(*str) || *str == '.')) str++; if (str == eptr) { while (*str != '\0' && (*str == ',' || *str == ' ')) str++; if (StringNICmp(str, "aa:", 3) == 0) { str += 3; while (*str == ' ') ++str; if ((eptr = StringChr(str, ')')) != NULL) { aa = TextSave(str, eptr-str); retval = CkQualPosSeqaa(head_gbqp, gbqp, preq, error_msgs, perform_corrections, aa, eptr); } } /* if, aa: */ else{ if (error_msgs){ ErrPostEx(SEV_ERROR, ERR_QUALIFIER_AA, "Missing aa: /%s=%s",gbqp->qual,gbqp->val); } retval = GB_FEAT_ERR_DROP; if (perform_corrections){ DeleteGBQualFromList(head_gbqp, gbqp, preq); } } } }else{ if (error_msgs){ ErrPostEx(SEV_ERROR, ERR_QUALIFIER_SeqPosComma, "Missing \',\' /%s=%s",gbqp->qual,gbqp->val); /* ) match */ } retval = GB_FEAT_ERR_DROP; if (perform_corrections){ DeleteGBQualFromList(head_gbqp, gbqp, preq); } } } /* if, (pos: */ else{ if (error_msgs){ ErrPostEx(SEV_ERROR, ERR_QUALIFIER_Pos, "Missing (pos: /%s=%s",gbqp->qual,gbqp->val); /* ) match */ } retval = GB_FEAT_ERR_DROP; if (perform_corrections){ DeleteGBQualFromList(head_gbqp, gbqp, preq); } } return retval; } /* CkQualPosaa */
static void ProcessAccession ( CharPtr accn, XtraPtr extra, Boolean only_new, Boolean get_var, Boolean do_nuc, Boolean do_prot ) { Char ch; Int4 flags = 0; Int4 gi = 0; Char id [41]; Boolean is_numeric = TRUE; Int4 newgi = 0; CharPtr ptr; SeqEntryPtr sep; SeqIdPtr sip; Char tmp [41]; long val; ptr = accn; ch = *ptr; while (ch != '\0' && is_numeric) { if (! IS_DIGIT (ch)) { is_numeric = FALSE; } ptr++; ch = *ptr; } if (is_numeric) { if (sscanf (accn, "%ld", &val) == 1) { gi = (Int4) val; if (gi < 1) return; if (only_new) { sip = GetSeqIdForGI (gi); if (sip != NULL) { SeqIdWrite (sip, tmp, PRINTID_TEXTID_ACC_VER, sizeof (tmp)); SeqIdFree (sip); ptr = StringChr (tmp, '.'); if (ptr != NULL) { *ptr = '\0'; sip = SeqIdFromAccessionDotVersion (tmp); newgi = GetGIForSeqId (sip); SeqIdFree (sip); if (newgi == gi) return; } } } } } else { sip = SeqIdFromAccessionDotVersion (accn); gi = GetGIForSeqId (sip); SeqIdFree (sip); if (only_new) { sip = GetSeqIdForGI (gi); if (sip != NULL) { SeqIdWrite (sip, id, PRINTID_TEXTID_ACC_VER, sizeof (id)); SeqIdFree (sip); if (StringICmp (accn, id) == 0) return; } } } if (gi < 1) return; if (get_var) { flags = 1; } sep = PubSeqSynchronousQuery (gi, 0, flags); if (sep == NULL) return; if (do_nuc) { DoSeqEntryToGnbk (sep, GENBANK_FMT, extra); } if (do_prot) { DoSeqEntryToGnbk (sep, GENPEPT_FMT, extra); } SeqEntryFree (sep); }
static void ProcessSingleRecord ( CharPtr filename, CSpeedFlagPtr cfp ) { AsnIoPtr aip; BioseqPtr bsp; ValNodePtr bsplist = NULL; BioseqSetPtr bssp; Pointer dataptr = NULL; Uint2 datatype, entityID = 0; FileCache fc; FILE *fp; Int1 iotype; Char line [512]; Int4 maxio = 1; SeqEntryPtr sep; time_t starttime, stoptime, worsttime; CharPtr str; Int4 x; if (cfp == NULL) return; if (StringHasNoText (filename)) return; if (StringChr (cfp->io, 'r') != NULL) { maxio = cfp->maxcount; } starttime = GetSecs (); for (x = 0; x < maxio; x++) { if (entityID != 0) { ObjMgrFreeByEntityID (entityID); entityID = 0; dataptr = NULL; } if (cfp->type == 1) { fp = FileOpen (filename, "r"); if (fp == NULL) { Message (MSG_POSTERR, "Failed to open '%s'", filename); return; } dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, FALSE, FALSE); FileClose (fp); entityID = ObjMgrRegister (datatype, dataptr); } else if (cfp->type >= 2 && cfp->type <= 5) { aip = AsnIoOpen (filename, cfp->binary? "rb" : "r"); if (aip == NULL) { Message (MSG_POSTERR, "AsnIoOpen failed for input file '%s'", filename); return; } switch (cfp->type) { case 2 : dataptr = (Pointer) SeqEntryAsnRead (aip, NULL); datatype = OBJ_SEQENTRY; break; case 3 : dataptr = (Pointer) BioseqAsnRead (aip, NULL); datatype = OBJ_BIOSEQ; break; case 4 : dataptr = (Pointer) BioseqSetAsnRead (aip, NULL); datatype = OBJ_BIOSEQSET; break; case 5 : dataptr = (Pointer) SeqSubmitAsnRead (aip, NULL); datatype = OBJ_SEQSUB; break; default : break; } AsnIoClose (aip); entityID = ObjMgrRegister (datatype, dataptr); } else if (cfp->type == 6) { fp = FileOpen (filename, "r"); if (fp == NULL) { Message (MSG_POSTERR, "Failed to open '%s'", filename); return; } dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, FALSE, FALSE); FileClose (fp); entityID = ObjMgrRegister (datatype, dataptr); } else if (cfp->type == 7) { fp = FileOpen (filename, "r"); if (fp == NULL) { Message (MSG_POSTERR, "Failed to open '%s'", filename); return; } FileCacheSetup (&fc, fp); str = FileCacheReadLine (&fc, line, sizeof (line), NULL); while (str != NULL) { str = FileCacheReadLine (&fc, line, sizeof (line), NULL); } FileClose (fp); return; } else { Message (MSG_POSTERR, "Input format type '%d' unrecognized", (int) cfp->type); return; } } if (entityID < 1 || dataptr == NULL) { Message (MSG_POSTERR, "Data read failed for input file '%s'", filename); return; } if (datatype == OBJ_SEQSUB || datatype == OBJ_SEQENTRY || datatype == OBJ_BIOSEQ || datatype == OBJ_BIOSEQSET) { stoptime = GetSecs (); worsttime = stoptime - starttime; if (cfp->logfp != NULL) { fprintf (cfp->logfp, "ASN reading time %ld seconds\n", (long) worsttime); fflush (cfp->logfp); } sep = GetTopSeqEntryForEntityID (entityID); if (sep == NULL) { sep = SeqEntryNew (); if (sep != NULL) { if (datatype == OBJ_BIOSEQ) { bsp = (BioseqPtr) dataptr; sep->choice = 1; sep->data.ptrvalue = bsp; SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, sep); } else if (datatype == OBJ_BIOSEQSET) { bssp = (BioseqSetPtr) dataptr; sep->choice = 2; sep->data.ptrvalue = bssp; SeqMgrSeqEntry (SM_BIOSEQSET, (Pointer) bssp, sep); } else { sep = SeqEntryFree (sep); } } sep = GetTopSeqEntryForEntityID (entityID); } if (sep != NULL) { if (cfp->lock) { starttime = GetSecs (); bsplist = LockFarComponents (sep); stoptime = GetSecs (); worsttime = stoptime - starttime; if (cfp->logfp != NULL) { fprintf (cfp->logfp, "Far component locking time %ld seconds\n", (long) worsttime); fflush (cfp->logfp); } } if (StringChr (cfp->io, 'w') != NULL) { starttime = GetSecs (); iotype = ASNIO_TEXT_OUT; if (StringChr (cfp->io, 'b') != NULL) { iotype = ASNIO_BIN_OUT; } for (x = 0; x < cfp->maxcount; x++) { aip = AsnIoNew (iotype, cfp->ofp, NULL, NULL, NULL); if (aip != NULL) { SeqEntryAsnWrite (sep, aip, NULL); AsnIoFree (aip, FALSE); } } stoptime = GetSecs (); worsttime = stoptime - starttime; if (cfp->logfp != NULL) { fprintf (cfp->logfp, "ASN writing time %ld seconds\n", (long) worsttime); fflush (cfp->logfp); } } starttime = GetSecs (); for (x = 0; x < cfp->maxcount; x++) { DoProcess (sep, entityID, cfp); } stoptime = GetSecs (); worsttime = stoptime - starttime; if (cfp->logfp != NULL) { fprintf (cfp->logfp, "Internal processing time %ld seconds\n", (long) worsttime); fflush (cfp->logfp); } ObjMgrFreeByEntityID (entityID); bsplist = UnlockFarComponents (bsplist); } } else { Message (MSG_POSTERR, "Datatype %d not recognized", (int) datatype); } }
static void DoProcess ( SeqEntryPtr sep, Uint2 entityID, CSpeedFlagPtr cfp ) { Char id [64]; ErrSev oldErrSev; ValidStructPtr vsp; if (sep == NULL || cfp == NULL) return; if (StringChr (cfp->clean, 't') != NULL) { VisitDescriptorsInSep (sep, NULL, MarkTitles); DeleteMarkedObjects (entityID, 0, NULL); } if (StringChr (cfp->clean, 'a') != NULL) { AssignIDsInEntity (entityID, 0, NULL); } if (StringChr (cfp->clean, 'b') != NULL) { BasicSeqEntryCleanup (sep); } if (StringChr (cfp->clean, 's') != NULL) { SeriousSeqEntryCleanup (sep, NULL, NULL); } if (StringChr (cfp->index, 'f') != NULL) { SeqMgrIndexFeatures (entityID, 0); } if (StringChr (cfp->seq, 'c') != NULL) { VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaExist); } if (StringChr (cfp->seq, 'C') != NULL) { VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaRegen); } if (StringChr (cfp->seq, 's') != NULL) { VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaSeq); } if (StringChr (cfp->seq, 'S') != NULL) { if (SeqMgrFeaturesAreIndexed (entityID) == 0) { SeqMgrIndexFeatures (entityID, 0); } VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaSeq); } if (StringChr (cfp->seq, 'r') != NULL) { VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaRaw); } if (StringChr (cfp->seq, 'd') != NULL) { VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaDefline); } if (StringChr (cfp->seq, 'D') != NULL) { if (SeqMgrFeaturesAreIndexed (entityID) == 0) { SeqMgrIndexFeatures (entityID, 0); } VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaDefline); } if (StringChr (cfp->seq, 'T') != NULL) { VisitDescriptorsInSep (sep, NULL, MarkTitles); DeleteMarkedObjects (entityID, 0, NULL); SeqMgrIndexFeatures (entityID, 0); VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaDefline); } if (StringChr (cfp->seq, 'x') != NULL) { VisitBioseqsInSep (sep, (Pointer) cfp, DoNewFastaDefline); } if (StringChr (cfp->seq, 'X') != NULL) { VisitDescriptorsInSep (sep, NULL, MarkTitles); DeleteMarkedObjects (entityID, 0, NULL); SeqMgrIndexFeatures (entityID, 0); VisitBioseqsInSep (sep, (Pointer) cfp, DoNewFastaDefline); } if (StringChr (cfp->seq, 'f') != NULL) { VisitFeaturesInSep (sep, (Pointer) cfp, DoFastaFeat); } if (StringChr (cfp->seq, 't') != NULL) { VisitFeaturesInSep (sep, (Pointer) cfp, DoFastaTrans); } if (StringChr (cfp->feat, 'v') != NULL) { VisitFeaturesInSep (sep, NULL, DoVisitFeaturesTest); } if (StringChr (cfp->feat, 'g') != NULL) { if (SeqMgrFeaturesAreIndexed (entityID) == 0) { SeqMgrIndexFeatures (entityID, 0); } VisitFeaturesInSep (sep, (Pointer) cfp, DoGeneOverlapPrintTest); } if (StringChr (cfp->feat, 'h') != NULL) { if (SeqMgrFeaturesAreIndexed (entityID) == 0) { SeqMgrIndexFeatures (entityID, 0); } VisitFeaturesInSep (sep, (Pointer) cfp, DoGeneOverlapSpeedTest); } if (StringChr (cfp->feat, 'x') != NULL) { } if (StringChr (cfp->feat, 'o') != NULL) { } if (StringChr (cfp->feat, 'd') != NULL) { } if (StringChr (cfp->feat, 't') != NULL) { SeqEntryToGnbk (sep, NULL, FTABLE_FMT, SEQUIN_MODE, NORMAL_STYLE, 0, 0, SHOW_PROT_FTABLE, NULL, cfp->ofp); } if (StringChr (cfp->feat, 's') != NULL) { if (SeqMgrFeaturesAreIndexed (entityID) == 0) { SeqMgrIndexFeatures (entityID, 0); } cfp->nucbsp = FindNucBioseq (sep); if (cfp->nucbsp != NULL) { BioseqToGeneticCode (cfp->nucbsp, &(cfp->genCode), NULL, NULL, NULL, 0, NULL); SeqIdWrite (cfp->nucbsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1); fprintf (cfp->ofp, "%s\n", id); VisitBioseqsInSep (sep, (Pointer) cfp, DoSuggestIntervals); cfp->nucbsp = NULL; cfp->genCode = 0; } } if (StringChr (cfp->feat, 'S') != NULL) { if (SeqMgrFeaturesAreIndexed (entityID) == 0) { SeqMgrIndexFeatures (entityID, 0); } cfp->nucbsp = FindNucBioseq (sep); if (cfp->nucbsp != NULL) { BioseqToGeneticCode (cfp->nucbsp, &(cfp->genCode), NULL, NULL, NULL, 0, NULL); SetBatchSuggestNucleotide (cfp->nucbsp, cfp->genCode); SeqIdWrite (cfp->nucbsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1); fprintf (cfp->ofp, "%s\n", id); VisitBioseqsInSep (sep, (Pointer) cfp, DoSuggestIntervals); ClearBatchSuggestNucleotide (); cfp->nucbsp = NULL; cfp->genCode = 0; } } if (StringChr (cfp->feat, 'c') != NULL) { VisitFeaturesInSep (sep, (Pointer) cfp, DoVisitCodingRegions); } if (StringChr (cfp->desc, 'b') != NULL) { } if (StringChr (cfp->desc, 't') != NULL) { } if (StringChr (cfp->verify, 'v') != NULL) { if (SeqMgrFeaturesAreIndexed (entityID) == 0) { SeqMgrIndexFeatures (entityID, 0); } vsp = ValidStructNew (); if (vsp != NULL) { vsp->useSeqMgrIndexes = TRUE; vsp->suppressContext = TRUE; vsp->seqSubmitParent = TRUE; vsp->testLatLonSubregion = TRUE; oldErrSev = ErrSetMessageLevel (SEV_NONE); vsp->errfunc = ValidCallback; vsp->userdata = (Pointer) cfp->ofp; /* vsp->convertGiToAccn = FALSE; */ ValidateSeqEntry (sep, vsp); ValidStructFree (vsp); ErrSetMessageLevel (oldErrSev); } } if (StringChr (cfp->verify, 'b') != NULL) { if (SeqMgrFeaturesAreIndexed (entityID) == 0) { SeqMgrIndexFeatures (entityID, 0); } SeqEntryToGnbk (sep, NULL, GENBANK_FMT, SEQUIN_MODE, NORMAL_STYLE, 0, 0, 0, NULL, cfp->ofp); } if (cfp->ofp != NULL) { fflush (cfp->ofp); } }
Int2 Ali_SeqLineGetType(CharPtr seqLine, AliConfigInfoPtr configPtr) { Int4 position; Int4 nuclCount; Int4 miscCount; FloatLo percentNucl; FloatLo percentMisc; Char commonNucls[20]; Char miscChars[5]; /* Is it definitely a protein sequence? */ /* The following chars are only in */ /* protein sequences. */ if ((StringChr (seqLine, 'E')) || (StringChr (seqLine, 'e')) || (StringChr (seqLine, 'F')) || (StringChr (seqLine, 'f')) || (StringChr (seqLine, 'I')) || (StringChr (seqLine, 'i')) || (StringChr (seqLine, 'L')) || (StringChr (seqLine, 'l')) || (StringChr (seqLine, 'P')) || (StringChr (seqLine, 'p')) || (StringChr (seqLine, 'Q')) || (StringChr (seqLine, 'q')) || /* (StringChr (seqLine, 'U')) || (StringChr (seqLine, 'u')) || */ (StringChr (seqLine, 'Z')) || (StringChr (seqLine, 'z')) || (StringChr (seqLine, '*'))) return ALI_PROTEIN; /* All others are technically ambiguous, but */ /* if we have a high enough percentage of */ /* common nucleotides, then it is probably a */ /* nucleotide sequence. */ nuclCount = 0; miscCount = 0; sprintf (commonNucls, "ATCGNXatcgnx"); if (configPtr->unalignedChar != NULL) sprintf (miscChars, "-%s%s%s ", configPtr->gapChar, configPtr->missingChar, configPtr->unalignedChar); else sprintf (miscChars, "-%s%s ", configPtr->gapChar, configPtr->missingChar); for (position = 0; seqLine[position] != '\0'; position++) { if (StringChr (commonNucls, seqLine[position]) != NULL) nuclCount++; else if (StringChr (miscChars, seqLine[position]) != NULL) miscCount++; } /* If we have a high percentage of misc chars then */ /* we don't have enough data to make a decision. */ percentMisc = ((FloatLo) miscCount) / ((FloatLo) StringLen (seqLine)); if ((percentMisc * 100) > 80) return ALI_AMBIGUOUS; /* Else, if a high percentage are common nucleotide */ /* characters then it is a nucleotide line. */ percentNucl = ((FloatLo) nuclCount + (FloatLo) miscCount) / (FloatLo) StringLen (seqLine); if ((percentNucl * 100) > configPtr->nuclLineMaxThreshold) return ALI_NUCLEOTIDE; else if ((percentNucl * 100) < configPtr->nuclLineMinThreshold) return ALI_PROTEIN; /* If we haven't come to a conclusion */ /* then say so. */ return ALI_AMBIGUOUS; }
/*************************************************************************** * SplitMultiValQual: * * ****************************************************************************/ NLM_EXTERN int SplitMultiValQual(GBQualPtr PNTR gbqp, Boolean error_msgs, Boolean perform_corrections) { Int2 val/*, len -- UNUSED */; GBQualPtr next_q, curq, preq = NULL, first_q, tmp; int retval = GB_FEAT_ERR_NONE; CharPtr bptr, ptr, buf; for (first_q = curq = *gbqp; curq != NULL; curq = next_q) { next_q = curq -> next; /* in case deleted */ val = GBQualSplit(curq->qual); /* len = StringLen(curq->qual); -- NO EFFECT */ if (val == -1) { preq = curq; continue; } bptr = curq->val; if (bptr == NULL) { preq = curq; continue; } if (*bptr != '(') { preq = curq; continue; } if (*(bptr+StringLen(bptr)-1) != ')') { preq = curq; continue; } *(bptr+StringLen(bptr)-1) = '\0'; if ((ptr = StringChr(bptr, ',')) == NULL) { StringCpy(bptr, bptr+1); preq = curq; continue; } ErrPostEx(SEV_WARNING, ERR_QUALIFIER_MultiValue, "Splited qualifier %s", curq->qual); buf = bptr; bptr++; curq->val = TextSave(bptr, ptr-bptr); bptr = ptr + 1; curq->next = NULL; while ((ptr = StringChr(bptr, ',')) != NULL) { tmp = GBQualNew(); tmp->qual = StringSave(curq->qual); tmp->val = TextSave(bptr, ptr-bptr); curq = tie_qual(curq, tmp); bptr = ptr + 1; } tmp = GBQualNew(); tmp->qual = StringSave(curq->qual); tmp->val = StringSave(bptr); curq = tie_qual(curq, tmp); tmp->next = next_q; curq = tmp; MemFree(buf); if (preq){ /*---- we have retained a qualifier, previously ----*/ if (preq -> next != next_q){ /*-- did not delete curq ----*/ preq = curq; } }else { /* ---- no qualifier previously retained, is there a new head pointer? */ if (first_q == *gbqp){ /* ---- we have kept our first qualifier */ preq = curq; }else{ /*--- we deleted the head of the queue, record current first qualifier */ first_q = *gbqp; } } if (*gbqp == NULL){ break; /* was one, is gone */ } } return retval; } /* SplitMultiValQual */
static void Get_Sequence (FILE *f, SeqRecPtr seqrp) { Char ch; CharPtr chptr; Boolean goOn; IdRecPtr last; Int4 len; Char str [256]; IdRecPtr this; Int4 total; if (f != NULL && seqrp != NULL) { if (seqrp->rawSeq == NULL) { seqrp->rawSeq = BSNew (1000); } if (seqrp->segLens == NULL) { seqrp->segLens = BSNew (10); } len = 0; total = 0; goOn = TRUE; while (goOn && ReadNextLine (f, str, sizeof (str))) { if (str [0] == '&') { goOn = FALSE; } else if (str [0] == '!') { goOn = FALSE; } else if (str [0] == '>') { if (len > 0) { if (seqrp->lookForStop) { BSPutByte (seqrp->rawSeq, (Int2) '*'); len++; } BSWrite (seqrp->segLens, &len, sizeof (Int4)); total += len; len = 0; } chptr = StringChr (str, ' '); if (chptr != NULL) { *chptr = '\0'; } if (seqrp->ids != NULL) { last = seqrp->ids; while (last->next != NULL) { last = last->next; } this = MemNew (sizeof (IdRec)); if (this != NULL) { this->id.accn = StringSave (str + 1); } last->next = this; } else { last = MemNew (sizeof (IdRec)); if (last != NULL) { last->id.accn = StringSave (str + 1); } seqrp->ids = last; } } else if (str [0] != '\0') { chptr = str; while (*chptr != '\0') { ch = TO_UPPER (*chptr); if (ch >= 'A' && ch <= 'Z') { if (seqrp->nuc) { if (ch == 'U') { ch = 'T'; } if (StringChr ("BDEFHIJKLMOPQRSUVWXYZ", ch) == NULL) { BSPutByte (seqrp->rawSeq, (Int2) ch); len++; } } else { if (StringChr ("JOU", ch) == NULL) { BSPutByte (seqrp->rawSeq, (Int2) ch); len++; } } } chptr++; } } } if (seqrp->nuc) { BSPutByte (seqrp->rawSeq, (Int2) 'N'); BSPutByte (seqrp->rawSeq, (Int2) 'N'); BSPutByte (seqrp->rawSeq, (Int2) 'N'); len += 3; } if (len > 0) { if (seqrp->lookForStop) { BSPutByte (seqrp->rawSeq, (Int2) '*'); len++; } BSWrite (seqrp->segLens, &len, sizeof (Int4)); total += len; len = 0; } if (total > 0) { seqrp->sequence = BSMerge (seqrp->rawSeq, NULL); seqrp->length = total; } else { seqrp->sequence = NULL; seqrp->length = 0; } } }