Ejemplo n.º 1
0
static SeqIdPtr SmartGuessMakeId (CharPtr str)
{
  CharPtr id_txt;
  SeqIdPtr sip = NULL;

  if (StringHasNoText (str)) {
    return NULL;
  } else if (StringChr (str, '|') != NULL) {
    sip = MakeSeqID (str);
  } else if (IsAllDigits (str)) {
    id_txt = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + 4));
    sprintf (id_txt, "gi|%s", str);
    sip = MakeSeqID (id_txt);
    id_txt = MemFree (id_txt);
  } else if (StringChr (str, '_') != NULL) {
    id_txt = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + 5));
    sprintf (id_txt, "oth|%s", str);
    sip = MakeSeqID (id_txt);
    id_txt = MemFree (id_txt);
  } else {
    id_txt = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + 4));
    sprintf (id_txt, "gb|%s", str);
    sip = MakeSeqID (id_txt);
    id_txt = MemFree (id_txt);
  }
  return sip;
}
Ejemplo n.º 2
0
Boolean IsProteinChar (Char ch)
{
  if (StringChr("ABCDEFGHIKLMNPQRSTUVWXYZ*abcdefghiklmnpqrstuvwxyz",ch) != NULL)
    return TRUE;
  else
    return FALSE;
}
Ejemplo n.º 3
0
Boolean IsNucleotideChar (Char ch)
{
  if (StringChr("abcdghkmnrstuvwxyABCDGHKMNRSTUVWXY",ch) != NULL)
    return TRUE;
  else
    return FALSE;
}
Ejemplo n.º 4
0
Int2 IsValidIdChar (Char idChar)
{
  if (StringChr("ABCDEFGHIJKLMNOPQRSTUVWXYZ",idChar) != NULL)
    return ID_GOOD_CHAR_LETTER;

  if (StringChr("abcdefghijklmnopqrstuvwxyz",idChar) != NULL)
    return ID_GOOD_CHAR_LETTER;

  if (StringChr("0123456789",idChar) != NULL)
    return ID_GOOD_CHAR_NUMBER;

  if (StringChr("\"._-|",idChar) != NULL)
    return ID_GOOD_CHAR_OTHER;

  return FALSE;
}
Ejemplo n.º 5
0
static void ExciseProteinIDLine (CharPtr line)
{
  CharPtr protein_id_line_start = NULL, protein_id_line_end;
  
  if (StringHasNoText (line))
  {
    return;
  }
  
  protein_id_line_start = StringStr (line, "\n\t\t\tprotein_id\t");
  if (protein_id_line_start == NULL)
  {
    return;
  }
  protein_id_line_end = StringChr (protein_id_line_start + 1, '\n');
  if (protein_id_line_end == NULL)
  {
    return;
  }
  
  while (*protein_id_line_end != 0)
  {
    *protein_id_line_start = *protein_id_line_end;
    protein_id_line_start ++;
    protein_id_line_end ++;
  }
  *protein_id_line_start = 0;
}
Ejemplo n.º 6
0
void WriteStdoutRamaOneModel(PMSD pmsdRoot, Int2 ModelNum ){

ValNodePtr vnpRama = NULL;
PRS prsHead = NULL;
PRS prsHere = NULL;
float phi,psi;
int num;
Int4 linelen;
Char aa[3]; 
Char ctemp[30];
Char chain[30];
PMGD pmgdAA = NULL;
CharPtr NCBIstdaaUC = "-ABCDEFGHIKLMNPQRSTVWXYZU*";

vnpRama=ConvertNode((PFB)pmsdRoot,AM_MGD);
if (vnpRama==NULL) return;
prsHead=Rama(vnpRama,ModelNum);
prsHere=prsHead;
while(prsHere) {
    phi = 0.0;
	psi = 0.0;
	phi=(float)prsHere->Phi;
	psi=(float)prsHere->Psi;
	pmgdAA = (PMGD) prsHere->pfbThis;
	num = (int) (pmgdAA->pdnmgLink->choice);
	StringCpy(ctemp, StringChr(NCBIstdaaUC,pmgdAA->pcIUPAC[0]));
        StringCpy(chain, ParentMolName((PFB) pmgdAA));
	aa[0] = ctemp[0];
	aa[1] = '\0';
	printf("%s, %s, %d, %f, %f\n", chain, aa , (int) num, phi,psi);
  	prsHere=prsHere->next;
	}
 freeRS(prsHead);
 return;
}
Ejemplo n.º 7
0
static ValNodePtr FieldsFromFieldListString (CharPtr str)
{
  CharPtr cpy, val, comma;
  Int4    qual;
  ValNodePtr field_list = NULL, qc;

  if (StringHasNoText (str)) {
    return NULL;
  }
  cpy = StringSave (str);
  val = cpy;
  comma = StringChr(val, ',');
  while (comma != NULL) {
    *comma = 0;
    qual = GetSourceQualTypeByName(val);
    if (qual < 0) {
      Message (MSG_ERROR, "%s is not a recognized source field name", val);
    } else {
      qc = ValNodeNew (NULL);
      qc->choice = SourceQualChoice_textqual;
      qc->data.intvalue = qual;
      ValNodeAddPointer (&field_list, FieldType_source_qual, qc);
    }
    *comma = ',';
    val = comma + 1;
    comma = StringChr (val, ',');
  }

  qual = GetSourceQualTypeByName(val);
  if (qual < 0) {
    Message (MSG_ERROR, "%s is not a recognized source field name", val);
  } else {
    qc = ValNodeNew (NULL);
    qc->choice = SourceQualChoice_textqual;
    qc->data.intvalue = qual;
    ValNodeAddPointer (&field_list, FieldType_source_qual, qc);
  }

  cpy = MemFree (cpy);
  return field_list;
}
Ejemplo n.º 8
0
static void DoNewFastaDefline (
  BioseqPtr bsp,
  Pointer userdata
)

{
  BioseqSetPtr   bssp;
  CSpeedFlagPtr  cfp;
  Char           id [128];
  CharPtr        title;

  if (bsp == NULL) return;
  cfp = (CSpeedFlagPtr) userdata;
  if (cfp == NULL) return;

  if (StringChr (cfp->skip, 's') != NULL) {
    if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
      bssp = (BioseqSetPtr) bsp->idx.parentptr;
      if (bssp != NULL) {
        if (bssp->_class == BioseqseqSet_class_segset ||
            bssp->_class == BioseqseqSet_class_parts) return;
      }
    }
  }
  if (StringChr (cfp->skip, 'v') != NULL) {
    if (bsp->repr == Seq_repr_virtual) return;
  }

  id [0] = '\0';
  SeqIdWrite (bsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
  title = NewCreateDefLine (NULL, bsp, FALSE, FALSE);
  if (StringHasNoText (title)) {
    title = StringSave ("?");
  }

  if (cfp->ofp != NULL) {
    fprintf (cfp->ofp, ">%s %s\n", id, title);
  }

  MemFree (title);
}
Ejemplo n.º 9
0
CharPtr s_OtherGetValue (CharPtr otherStr)
{
  CharPtr tempStrPtr;
  CharPtr valueBuff;
  Int2    charCount;

  /* Go to the first character after the '=' */

  if ((tempStrPtr = StringChr (otherStr, '=')) == NULL)
    return NULL;
  tempStrPtr++;

  /* Skip spaces */

  while (*tempStrPtr == ' ')
    tempStrPtr++;

  if (*tempStrPtr == '\0')
    return NULL;

  /* Get the value */

  valueBuff = (CharPtr) MemNew (ALI_MAX_LINE_LEN);
  charCount = 0;

  while ((*tempStrPtr != ' ') && (*tempStrPtr != '\0'))
    {
      valueBuff[charCount] = *tempStrPtr;
      charCount++;
      tempStrPtr++;
    }

  valueBuff[charCount] = '\0';

  /* Return successfully */

  return valueBuff;
}
Ejemplo n.º 10
0
static DefLineInfoPtr s_ParseDefLine (CharPtr lineStr,
				      Int4 rowNum,
				      ErrInfoPtr PNTR  errorListPtr)
{
  Char           ch;
  CharPtr        defStr;
  CharPtr        idStr;
  Int4           defPosition;
  Int4           idPosition;
  Int4           position;
  Int2           state;
  DefLineInfoPtr defLinePtr = NULL;
  ErrInfoPtr     errPtr;

  defPosition = 0;
  idPosition = 0;

  defStr = (CharPtr) MemNew (StringLen(lineStr)+1);
  idStr  = (CharPtr) MemNew (StringLen(lineStr)+1);

  /* Parse the line character by character */

  state = DEFLINE_PRE_DATA;

  for (position = 0; lineStr[position] != '\0'; position++)
    {
      ch = lineStr[position];

      switch (state)
	{
	case DEFLINE_PRE_DATA :
	  if (IS_WHITESP(ch))
	    continue;
	  else if (ch == '>')
	    state = DEFLINE_SEQID;
	  else
	    {
	      MemFree(defStr);
	      MemFree(idStr);
	      return NULL;  /* Not a defline */
	    }
	  break;
	case DEFLINE_SEQID : 
	  if (IsValidIdChar(ch))
	    {
	      idStr[idPosition] = ch;
	      idPosition++;
	    }
	  else if (IS_WHITESP(ch))	
	    {
	      if (idPosition > 0)
		{
		  state = DEFLINE_DEFINITION;
		  defStr[defPosition] = ch;
		  defPosition++;
		}
	      else
		continue;
	    }
	  else if (ch == '[')
	    {
	      state = DEFLINE_DEFINITION;
	      defStr[defPosition] = ch;
	      defPosition++;
	    }
	  else
	    {
	      errPtr = Ali_AddError (errorListPtr, ERR_INVALID_DEFLINE,
				     lineStr, (Int4) ch);
	      errPtr->rowNum = rowNum;
	      MemFree(defStr);
	      MemFree(idStr);
	      return NULL;
	    }
	  break;
	case DEFLINE_DEFINITION :
	  defStr[defPosition] = ch;
	  defPosition++;
	  break;
	default:
	  break;
	}
    }

  /* Check for blank line */
  
  if (state == DEFLINE_PRE_DATA)
    {
      MemFree(defStr);
      MemFree(idStr);
      return NULL;
    }

  idStr[idPosition]   = '\0';
  defStr[defPosition] = '\0';

  /* Make sure that it has at least one */
  /* set of square brackets.            */

  if ((StringChr(defStr,'[') == NULL) || (StringChr(defStr,']') == NULL))
    {
      errPtr = Ali_AddError (errorListPtr, ERR_DEFLINE_NODEFS, lineStr);
      errPtr->rowNum = rowNum;
    }
  
  /* If we made it to here, then */
  /* it's a valid definition line. */

  defLinePtr = (DefLineInfoPtr) MemNew (sizeof (DefLineInfo));

  if (StringLen (defStr) != 0)
    defLinePtr->definitions = defStr;
  else
    defLinePtr->definitions = NULL;

  if (StringLen (idStr) != 0)
    defLinePtr->id = idStr;
  else
    defLinePtr->id = NULL;

  return defLinePtr;
}
Ejemplo n.º 11
0
/***************************************************************************
*  CkQualSeqaa:
*  -- format       (seq:"codon-sequence", aa:amino_acid)
*  -- example      /codon=(seq:"ttt",aa:Leu)
*                  /codon=(seq: "ttt", aa: Leu )
*                                                                  6-29-93
***************************************************************************/
NLM_EXTERN int CkQualSeqaa (GBQualPtr PNTR head_gbqp, GBQualPtr gbqp, 
   GBQualPtr preq,
   Boolean error_msgs, Boolean perform_corrections)
{
   CharPtr  eptr, str, aa;
   int retval = GB_FEAT_ERR_NONE;

   str = gbqp->val;

   if (StringNICmp(str, "(seq:", 5) == 0) {
      str += 5;

      while (*str == ' ')
          ++str;

      if ((eptr = StringChr(str, ',')) != NULL) { 
         while (str != eptr)
             str++;

         while (*str != '\0' && (*str == ',' || *str == ' '))
             str++;

         if (StringNICmp(str, "aa:", 3) == 0) {
            str += 3;

            while (*str == ' ')
                ++str;

            if ((eptr = StringChr(str, ')')) != NULL) {
               aa = TextSave(str, eptr-str);
               
                 retval = CkQualPosSeqaa(head_gbqp,  gbqp, preq,
                    error_msgs, perform_corrections,  aa, eptr);

             }
           } /* if, aa: */ else{
            if (error_msgs){ 
               ErrPostEx(SEV_ERROR, ERR_QUALIFIER_AA,
                 "Missing aa: /%s=%s",gbqp->qual,gbqp->val); 
             }
             retval = GB_FEAT_ERR_DROP;
             if (perform_corrections){
                DeleteGBQualFromList(head_gbqp, gbqp, preq);
             }
            
         }
      }else{
         if (error_msgs){ 
            ErrPostEx(SEV_ERROR, ERR_QUALIFIER_SeqPosComma,
               "Missing \',\' /%s=%s",gbqp->qual,gbqp->val); 
   /* ) match */             
         }
         retval = GB_FEAT_ERR_DROP;
        if (perform_corrections){
             DeleteGBQualFromList(head_gbqp, gbqp, preq);
         }
      }
   } /* if, (seq: */ else {


        if (error_msgs){ 
           ErrPostEx(SEV_ERROR, ERR_QUALIFIER_Seq,
              "Missing (seq: /%s=%s",gbqp->qual,gbqp->val); 
/* ) match */             
        }
        retval = GB_FEAT_ERR_DROP;
       if (perform_corrections){
            DeleteGBQualFromList(head_gbqp, gbqp, preq);
        }
      }
  
      return retval;

} /* CkQualSeqaa */
Ejemplo n.º 12
0
/***************************************************************************
*  CkQualPosaa:
*  
*  -- format       (pos:base_range, aa:amino_acid)
*  -- example      /anticodon=(pos:34..36,aa:Phe)
*                  /anticodon=(pos: 34..36, aa: Phe)
*                                                                 10-12-93
****************************************************************************/
NLM_EXTERN int CkQualPosaa(GBQualPtr PNTR head_gbqp, GBQualPtr gbqp, 
   GBQualPtr preq,
   Boolean error_msgs, Boolean perform_corrections)
{
   CharPtr  eptr, str, aa = NULL;
   int retval = GB_FEAT_ERR_NONE;

   str = gbqp->val;

   if (StringNICmp(str, "(pos:", 5) == 0) {
      str += 5;

      while (*str == ' ')
          ++str;

/*---I expect that we maight need to allow blanks here, 
            but not now... -Karl 1/28/94 */
      if ((eptr = StringChr(str, ',')) != NULL) { 
         while (str != eptr  && (IS_DIGIT(*str) || *str == '.'))
             str++;

         if (str == eptr) {
            while (*str != '\0' && (*str == ',' || *str == ' '))
                str++;

            if (StringNICmp(str, "aa:", 3) == 0) {
               str += 3;

               while (*str == ' ')
                   ++str;

               if ((eptr = StringChr(str, ')')) != NULL) {
                  aa = TextSave(str, eptr-str);
                  
                  
                 retval = CkQualPosSeqaa(head_gbqp,  gbqp, preq,
                    error_msgs, perform_corrections,  aa, eptr);
               }
            } /* if, aa: */ else{
               if (error_msgs){ 
                ErrPostEx(SEV_ERROR, ERR_QUALIFIER_AA,
                   "Missing aa: /%s=%s",gbqp->qual,gbqp->val); 
               }
               retval = GB_FEAT_ERR_DROP;
               if (perform_corrections){
                  DeleteGBQualFromList(head_gbqp, gbqp, preq);
               }
            }
         }
      }else{
         if (error_msgs){ 
            ErrPostEx(SEV_ERROR, ERR_QUALIFIER_SeqPosComma,
               "Missing \',\' /%s=%s",gbqp->qual,gbqp->val); 
   /* ) match */             
         }
         retval = GB_FEAT_ERR_DROP;
        if (perform_corrections){
             DeleteGBQualFromList(head_gbqp, gbqp, preq);
         }
      }
   } /* if, (pos: */  else{
      if (error_msgs){ 
         ErrPostEx(SEV_ERROR, ERR_QUALIFIER_Pos,
            "Missing (pos: /%s=%s",gbqp->qual,gbqp->val); 
/* ) match */             
      }
      retval = GB_FEAT_ERR_DROP;
     if (perform_corrections){
          DeleteGBQualFromList(head_gbqp, gbqp, preq);
      }
   }
                        
      return retval;

} /* CkQualPosaa */
Ejemplo n.º 13
0
static void ProcessAccession (
  CharPtr accn,
  XtraPtr extra,
  Boolean only_new,
  Boolean get_var,
  Boolean do_nuc,
  Boolean do_prot
)

{
  Char         ch;
  Int4         flags = 0;
  Int4         gi = 0;
  Char         id [41];
  Boolean      is_numeric = TRUE;
  Int4         newgi = 0;
  CharPtr      ptr;
  SeqEntryPtr  sep;
  SeqIdPtr     sip;
  Char         tmp [41];
  long         val;

  ptr = accn;
  ch = *ptr;
  while (ch != '\0' && is_numeric) {
    if (! IS_DIGIT (ch)) {
      is_numeric = FALSE;
    }
    ptr++;
    ch = *ptr;
  }

  if (is_numeric) {
    if (sscanf (accn, "%ld", &val) == 1) {
      gi = (Int4) val;
      if (gi < 1) return;
      if (only_new) {
        sip = GetSeqIdForGI (gi);
        if (sip != NULL) {
          SeqIdWrite (sip, tmp, PRINTID_TEXTID_ACC_VER, sizeof (tmp));
          SeqIdFree (sip);
          ptr = StringChr (tmp, '.');
          if (ptr != NULL) {
            *ptr = '\0';
            sip = SeqIdFromAccessionDotVersion (tmp);
            newgi = GetGIForSeqId (sip);
            SeqIdFree (sip);
            if (newgi == gi) return;
          }
        }
      }
    }
  } else {
    sip = SeqIdFromAccessionDotVersion (accn);
    gi = GetGIForSeqId (sip);
    SeqIdFree (sip);
    if (only_new) {
      sip = GetSeqIdForGI (gi);
      if (sip != NULL) {
        SeqIdWrite (sip, id, PRINTID_TEXTID_ACC_VER, sizeof (id));
        SeqIdFree (sip);
        if (StringICmp (accn, id) == 0) return;
      }
    }
  }
  if (gi < 1) return;

  if (get_var) {
    flags = 1;
  }
  sep = PubSeqSynchronousQuery (gi, 0, flags);
  if (sep == NULL) return;

  if (do_nuc) {
    DoSeqEntryToGnbk (sep, GENBANK_FMT, extra);
  }
  if (do_prot) {
    DoSeqEntryToGnbk (sep, GENPEPT_FMT, extra);
  }

  SeqEntryFree (sep);
}
Ejemplo n.º 14
0
static void ProcessSingleRecord (
  CharPtr filename,
  CSpeedFlagPtr cfp
)

{
  AsnIoPtr      aip;
  BioseqPtr     bsp;
  ValNodePtr    bsplist = NULL;
  BioseqSetPtr  bssp;
  Pointer       dataptr = NULL;
  Uint2         datatype, entityID = 0;
  FileCache     fc;
  FILE          *fp;
  Int1          iotype;
  Char          line [512];
  Int4          maxio = 1;
  SeqEntryPtr   sep;
  time_t        starttime, stoptime, worsttime;
  CharPtr       str;
  Int4          x;

  if (cfp == NULL) return;

  if (StringHasNoText (filename)) return;

  if (StringChr (cfp->io, 'r') != NULL) {
    maxio = cfp->maxcount;
  }

  starttime = GetSecs ();

  for (x = 0; x < maxio; x++) {
    if (entityID != 0) {
      ObjMgrFreeByEntityID (entityID);
      entityID = 0;
      dataptr = NULL;
    }

    if (cfp->type == 1) {

      fp = FileOpen (filename, "r");
      if (fp == NULL) {
        Message (MSG_POSTERR, "Failed to open '%s'", filename);
        return;
      }

      dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, FALSE, FALSE);

      FileClose (fp);

      entityID = ObjMgrRegister (datatype, dataptr);

    } else if (cfp->type >= 2 && cfp->type <= 5) {

      aip = AsnIoOpen (filename, cfp->binary? "rb" : "r");
      if (aip == NULL) {
        Message (MSG_POSTERR, "AsnIoOpen failed for input file '%s'", filename);
        return;
      }

      switch (cfp->type) {
        case 2 :
          dataptr = (Pointer) SeqEntryAsnRead (aip, NULL);
          datatype = OBJ_SEQENTRY;
          break;
        case 3 :
          dataptr = (Pointer) BioseqAsnRead (aip, NULL);
          datatype = OBJ_BIOSEQ;
          break;
        case 4 :
          dataptr = (Pointer) BioseqSetAsnRead (aip, NULL);
          datatype = OBJ_BIOSEQSET;
          break;
        case 5 :
          dataptr = (Pointer) SeqSubmitAsnRead (aip, NULL);
          datatype = OBJ_SEQSUB;
          break;
        default :
          break;
      }

      AsnIoClose (aip);

      entityID = ObjMgrRegister (datatype, dataptr);

    } else if (cfp->type == 6) {

      fp = FileOpen (filename, "r");
      if (fp == NULL) {
        Message (MSG_POSTERR, "Failed to open '%s'", filename);
        return;
      }

      dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, FALSE, FALSE);

      FileClose (fp);

      entityID = ObjMgrRegister (datatype, dataptr);

    } else if (cfp->type == 7) {

      fp = FileOpen (filename, "r");
      if (fp == NULL) {
        Message (MSG_POSTERR, "Failed to open '%s'", filename);
        return;
      }

      FileCacheSetup (&fc, fp);

      str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
      while (str != NULL) {
        str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
      }

      FileClose (fp);

      return;

    } else {
      Message (MSG_POSTERR, "Input format type '%d' unrecognized", (int) cfp->type);
      return;
    }
  }

  if (entityID < 1 || dataptr == NULL) {
    Message (MSG_POSTERR, "Data read failed for input file '%s'", filename);
    return;
  }

  if (datatype == OBJ_SEQSUB || datatype == OBJ_SEQENTRY ||
        datatype == OBJ_BIOSEQ || datatype == OBJ_BIOSEQSET) {

    stoptime = GetSecs ();
    worsttime = stoptime - starttime;
    if (cfp->logfp != NULL) {
      fprintf (cfp->logfp, "ASN reading time %ld seconds\n", (long) worsttime);
      fflush (cfp->logfp);
    }

    sep = GetTopSeqEntryForEntityID (entityID);

    if (sep == NULL) {
      sep = SeqEntryNew ();
      if (sep != NULL) {
        if (datatype == OBJ_BIOSEQ) {
          bsp = (BioseqPtr) dataptr;
          sep->choice = 1;
          sep->data.ptrvalue = bsp;
          SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, sep);
        } else if (datatype == OBJ_BIOSEQSET) {
          bssp = (BioseqSetPtr) dataptr;
          sep->choice = 2;
          sep->data.ptrvalue = bssp;
          SeqMgrSeqEntry (SM_BIOSEQSET, (Pointer) bssp, sep);
        } else {
          sep = SeqEntryFree (sep);
        }
      }
      sep = GetTopSeqEntryForEntityID (entityID);
    }

    if (sep != NULL) {

      if (cfp->lock) {
        starttime = GetSecs ();

        bsplist = LockFarComponents (sep);

        stoptime = GetSecs ();
        worsttime = stoptime - starttime;
        if (cfp->logfp != NULL) {
          fprintf (cfp->logfp, "Far component locking time %ld seconds\n", (long) worsttime);
          fflush (cfp->logfp);
        }
      }

      if (StringChr (cfp->io, 'w') != NULL) {
        starttime = GetSecs ();

        iotype = ASNIO_TEXT_OUT;
        if (StringChr (cfp->io, 'b') != NULL) {
          iotype = ASNIO_BIN_OUT;
        }

        for (x = 0; x < cfp->maxcount; x++) {
          aip = AsnIoNew (iotype, cfp->ofp, NULL, NULL, NULL);
          if (aip != NULL) {
            SeqEntryAsnWrite (sep, aip, NULL);
            AsnIoFree (aip, FALSE);
          }
        }

        stoptime = GetSecs ();
        worsttime = stoptime - starttime;
        if (cfp->logfp != NULL) {
          fprintf (cfp->logfp, "ASN writing time %ld seconds\n", (long) worsttime);
          fflush (cfp->logfp);
        }
      }

      starttime = GetSecs ();

      for (x = 0; x < cfp->maxcount; x++) {
        DoProcess (sep, entityID, cfp);
      }

      stoptime = GetSecs ();
      worsttime = stoptime - starttime;
      if (cfp->logfp != NULL) {
        fprintf (cfp->logfp, "Internal processing time %ld seconds\n", (long) worsttime);
        fflush (cfp->logfp);
      }

      ObjMgrFreeByEntityID (entityID);

      bsplist = UnlockFarComponents (bsplist);
    }

  } else {

    Message (MSG_POSTERR, "Datatype %d not recognized", (int) datatype);
  }
}
Ejemplo n.º 15
0
static void DoProcess (
  SeqEntryPtr sep,
  Uint2 entityID,
  CSpeedFlagPtr cfp
)

{
  Char            id [64];
  ErrSev          oldErrSev;
  ValidStructPtr  vsp;

  if (sep == NULL || cfp == NULL) return;

  if (StringChr (cfp->clean, 't') != NULL) {
    VisitDescriptorsInSep (sep, NULL, MarkTitles);
    DeleteMarkedObjects (entityID, 0, NULL);
  }
  if (StringChr (cfp->clean, 'a') != NULL) {
    AssignIDsInEntity (entityID, 0, NULL);
  }
  if (StringChr (cfp->clean, 'b') != NULL) {
    BasicSeqEntryCleanup (sep);
  }
  if (StringChr (cfp->clean, 's') != NULL) {
    SeriousSeqEntryCleanup (sep, NULL, NULL);
  }

  if (StringChr (cfp->index, 'f') != NULL) {
    SeqMgrIndexFeatures (entityID, 0);
  }

  if (StringChr (cfp->seq, 'c') != NULL) {
    VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaExist);
  }
  if (StringChr (cfp->seq, 'C') != NULL) {
    VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaRegen);
  }
  if (StringChr (cfp->seq, 's') != NULL) {
    VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaSeq);
  }
  if (StringChr (cfp->seq, 'S') != NULL) {
    if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
      SeqMgrIndexFeatures (entityID, 0);
    }
    VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaSeq);
  }
  if (StringChr (cfp->seq, 'r') != NULL) {
    VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaRaw);
  }
  if (StringChr (cfp->seq, 'd') != NULL) {
    VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaDefline);
  }
  if (StringChr (cfp->seq, 'D') != NULL) {
    if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
      SeqMgrIndexFeatures (entityID, 0);
    }
    VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaDefline);
  }
  if (StringChr (cfp->seq, 'T') != NULL) {
    VisitDescriptorsInSep (sep, NULL, MarkTitles);
    DeleteMarkedObjects (entityID, 0, NULL);
    SeqMgrIndexFeatures (entityID, 0);
    VisitBioseqsInSep (sep, (Pointer) cfp, DoFastaDefline);
  }
  if (StringChr (cfp->seq, 'x') != NULL) {
    VisitBioseqsInSep (sep, (Pointer) cfp, DoNewFastaDefline);
  }
  if (StringChr (cfp->seq, 'X') != NULL) {
    VisitDescriptorsInSep (sep, NULL, MarkTitles);
    DeleteMarkedObjects (entityID, 0, NULL);
    SeqMgrIndexFeatures (entityID, 0);
    VisitBioseqsInSep (sep, (Pointer) cfp, DoNewFastaDefline);
  }
  
  if (StringChr (cfp->seq, 'f') != NULL) {
    VisitFeaturesInSep (sep, (Pointer) cfp, DoFastaFeat);
  }
  if (StringChr (cfp->seq, 't') != NULL) {
    VisitFeaturesInSep (sep, (Pointer) cfp, DoFastaTrans);
  }

  if (StringChr (cfp->feat, 'v') != NULL) {
    VisitFeaturesInSep (sep, NULL, DoVisitFeaturesTest);
  }
  if (StringChr (cfp->feat, 'g') != NULL) {
    if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
      SeqMgrIndexFeatures (entityID, 0);
    }
    VisitFeaturesInSep (sep, (Pointer) cfp, DoGeneOverlapPrintTest);
  }
  if (StringChr (cfp->feat, 'h') != NULL) {
    if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
      SeqMgrIndexFeatures (entityID, 0);
    }
    VisitFeaturesInSep (sep, (Pointer) cfp, DoGeneOverlapSpeedTest);
  }
  if (StringChr (cfp->feat, 'x') != NULL) {
  }
  if (StringChr (cfp->feat, 'o') != NULL) {
  }
  if (StringChr (cfp->feat, 'd') != NULL) {
  }
  if (StringChr (cfp->feat, 't') != NULL) {
    SeqEntryToGnbk (sep, NULL, FTABLE_FMT, SEQUIN_MODE, NORMAL_STYLE,
                    0, 0, SHOW_PROT_FTABLE, NULL, cfp->ofp);
  }
  if (StringChr (cfp->feat, 's') != NULL) {
    if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
      SeqMgrIndexFeatures (entityID, 0);
    }
    cfp->nucbsp = FindNucBioseq (sep);
    if (cfp->nucbsp != NULL) {
      BioseqToGeneticCode (cfp->nucbsp, &(cfp->genCode), NULL, NULL, NULL, 0, NULL);
      SeqIdWrite (cfp->nucbsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
      fprintf (cfp->ofp, "%s\n", id);
      VisitBioseqsInSep (sep, (Pointer) cfp, DoSuggestIntervals);
      cfp->nucbsp = NULL;
      cfp->genCode = 0;
    }
  }
  if (StringChr (cfp->feat, 'S') != NULL) {
    if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
      SeqMgrIndexFeatures (entityID, 0);
    }
    cfp->nucbsp = FindNucBioseq (sep);
    if (cfp->nucbsp != NULL) {
      BioseqToGeneticCode (cfp->nucbsp, &(cfp->genCode), NULL, NULL, NULL, 0, NULL);
      SetBatchSuggestNucleotide (cfp->nucbsp, cfp->genCode);
      SeqIdWrite (cfp->nucbsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
      fprintf (cfp->ofp, "%s\n", id);
      VisitBioseqsInSep (sep, (Pointer) cfp, DoSuggestIntervals);
      ClearBatchSuggestNucleotide ();
      cfp->nucbsp = NULL;
      cfp->genCode = 0;
    }
  }
  if (StringChr (cfp->feat, 'c') != NULL) {
    VisitFeaturesInSep (sep, (Pointer) cfp, DoVisitCodingRegions);
  }

  if (StringChr (cfp->desc, 'b') != NULL) {
  }
  if (StringChr (cfp->desc, 't') != NULL) {
  }

  if (StringChr (cfp->verify, 'v') != NULL) {
    if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
      SeqMgrIndexFeatures (entityID, 0);
    }
    vsp = ValidStructNew ();
    if (vsp != NULL) {
      vsp->useSeqMgrIndexes = TRUE;
      vsp->suppressContext = TRUE;
      vsp->seqSubmitParent = TRUE;
      vsp->testLatLonSubregion = TRUE;
      oldErrSev = ErrSetMessageLevel (SEV_NONE);
      vsp->errfunc = ValidCallback;
      vsp->userdata = (Pointer) cfp->ofp;
      /* vsp->convertGiToAccn = FALSE; */
      ValidateSeqEntry (sep, vsp);
      ValidStructFree (vsp);
      ErrSetMessageLevel (oldErrSev);
    }
  }
  if (StringChr (cfp->verify, 'b') != NULL) {
    if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
      SeqMgrIndexFeatures (entityID, 0);
    }
    SeqEntryToGnbk (sep, NULL, GENBANK_FMT, SEQUIN_MODE, NORMAL_STYLE,
                    0, 0, 0, NULL, cfp->ofp);
  }

  if (cfp->ofp != NULL) {
    fflush (cfp->ofp);
  }
}
Ejemplo n.º 16
0
Int2 Ali_SeqLineGetType(CharPtr seqLine,
			AliConfigInfoPtr configPtr)
{
  Int4    position;
  Int4    nuclCount;
  Int4    miscCount;
  FloatLo percentNucl;
  FloatLo percentMisc;
  Char    commonNucls[20];
  Char    miscChars[5];

  /* Is it definitely a protein sequence? */
  /* The following chars are only in      */
  /* protein sequences.                   */

  if ((StringChr (seqLine, 'E')) ||
      (StringChr (seqLine, 'e')) ||
      (StringChr (seqLine, 'F')) ||
      (StringChr (seqLine, 'f')) ||
      (StringChr (seqLine, 'I')) ||
      (StringChr (seqLine, 'i')) ||
      (StringChr (seqLine, 'L')) ||
      (StringChr (seqLine, 'l')) ||
      (StringChr (seqLine, 'P')) ||
      (StringChr (seqLine, 'p')) ||
      (StringChr (seqLine, 'Q')) ||
      (StringChr (seqLine, 'q')) ||
      /*
      (StringChr (seqLine, 'U')) ||
      (StringChr (seqLine, 'u')) ||
      */
      (StringChr (seqLine, 'Z')) ||
      (StringChr (seqLine, 'z')) ||
      (StringChr (seqLine, '*')))
    return ALI_PROTEIN;

  /* All others are technically ambiguous, but */
  /* if we have a high enough percentage of    */
  /* common nucleotides, then it is probably a */
  /* nucleotide sequence.                      */
  
  nuclCount = 0;
  miscCount = 0;
  sprintf (commonNucls, "ATCGNXatcgnx");

  if (configPtr->unalignedChar != NULL)
    sprintf (miscChars, "-%s%s%s ", configPtr->gapChar,
	     configPtr->missingChar, configPtr->unalignedChar);
  else
    sprintf (miscChars, "-%s%s ", configPtr->gapChar,
	     configPtr->missingChar);

  for (position = 0; seqLine[position] != '\0'; position++) {
    if (StringChr (commonNucls, seqLine[position]) != NULL)
      nuclCount++;
    else if (StringChr (miscChars, seqLine[position]) != NULL)
      miscCount++;
  }

  /* If we have a high percentage of misc chars then */
  /* we don't have enough data to make a decision.   */

  percentMisc = ((FloatLo) miscCount) / ((FloatLo) StringLen (seqLine));
  if ((percentMisc * 100) > 80)
    return ALI_AMBIGUOUS;

  /* Else, if a high percentage are common nucleotide */
  /* characters then it is a nucleotide line.         */

  percentNucl = ((FloatLo) nuclCount + (FloatLo) miscCount) / 
                (FloatLo) StringLen (seqLine);

  if ((percentNucl * 100) > configPtr->nuclLineMaxThreshold)
    return ALI_NUCLEOTIDE;
  else if ((percentNucl * 100) < configPtr->nuclLineMinThreshold)
    return ALI_PROTEIN;

  /* If we haven't come to a conclusion */
  /* then say so.                       */

  return ALI_AMBIGUOUS;
}
Ejemplo n.º 17
0
/***************************************************************************
*  SplitMultiValQual:
*
*     
****************************************************************************/
NLM_EXTERN int SplitMultiValQual(GBQualPtr PNTR gbqp, 
     Boolean error_msgs, Boolean perform_corrections)
{
   Int2        val/*, len -- UNUSED */;
   GBQualPtr   next_q, curq, preq = NULL, first_q, tmp;
   int retval = GB_FEAT_ERR_NONE;
   CharPtr	bptr, ptr, buf;
   
   for (first_q = curq = *gbqp; curq != NULL; curq = next_q) {
        next_q = curq -> next;  /* in case deleted */

       val = GBQualSplit(curq->qual);
/*       len = StringLen(curq->qual); -- NO EFFECT */

       if (val == -1) {
       		preq = curq;
       		continue;
       }
       bptr = curq->val;
       if (bptr == NULL) {
       		preq = curq;
       		continue;
       }
       if (*bptr != '(') {
       		preq = curq;
       		continue;
       }
       if (*(bptr+StringLen(bptr)-1) != ')') {
       		preq = curq;
       		continue;
       }
 	   *(bptr+StringLen(bptr)-1) = '\0';
      if ((ptr = StringChr(bptr, ',')) == NULL) {
      		StringCpy(bptr, bptr+1);
       		preq = curq;
       		continue;
        }
      			
		ErrPostEx(SEV_WARNING, ERR_QUALIFIER_MultiValue,
		   "Splited qualifier %s", curq->qual); 
		buf = bptr;
		bptr++;
		curq->val = TextSave(bptr, ptr-bptr);
		bptr = ptr + 1;
		curq->next = NULL;
		while ((ptr = StringChr(bptr, ',')) != NULL) {
			tmp = GBQualNew();
			tmp->qual = StringSave(curq->qual);
			tmp->val = TextSave(bptr, ptr-bptr);
			curq = tie_qual(curq, tmp);
			bptr = ptr + 1;
		} 
		tmp = GBQualNew();
		tmp->qual = StringSave(curq->qual);
		tmp->val = StringSave(bptr);
		curq = tie_qual(curq, tmp);
		tmp->next = next_q;
		curq = tmp;
		MemFree(buf);
      
       if (preq){
  /*---- we have retained a qualifier, previously ----*/     
          if (preq -> next != next_q){
   /*-- did not delete curq ----*/  
              preq = curq;     
          }
       }else {
    /* ---- no qualifier previously retained, is there a new head pointer? */
          if (first_q == *gbqp){
      /* ---- we have kept our first qualifier  */
             preq = curq;    
          }else{
      /*--- we deleted the head of the queue, record current first qualifier */
             first_q = *gbqp;     
          }   
       }
      if (*gbqp == NULL){
          break;  /* was one, is gone */
       }
   }

   return retval;

} /* SplitMultiValQual */  
Ejemplo n.º 18
0
static void Get_Sequence (FILE *f, SeqRecPtr seqrp)

{
  Char      ch;
  CharPtr   chptr;
  Boolean   goOn;
  IdRecPtr  last;
  Int4      len;
  Char      str [256];
  IdRecPtr  this;
  Int4      total;

  if (f != NULL && seqrp != NULL) {
    if (seqrp->rawSeq == NULL) {
      seqrp->rawSeq = BSNew (1000);
    }
    if (seqrp->segLens == NULL) {
      seqrp->segLens = BSNew (10);
    }
    len = 0;
    total = 0;
    goOn = TRUE;
    while (goOn && ReadNextLine (f, str, sizeof (str))) {
      if (str [0] == '&') {
        goOn = FALSE;
      } else if (str [0] == '!') {
        goOn = FALSE;
      } else if (str [0] == '>') {
        if (len > 0) {
          if (seqrp->lookForStop) {
            BSPutByte (seqrp->rawSeq, (Int2) '*');
            len++;
          }
          BSWrite (seqrp->segLens, &len, sizeof (Int4));
          total += len;
          len = 0;
        }
        chptr = StringChr (str, ' ');
        if (chptr != NULL) {
          *chptr = '\0';
        }
        if (seqrp->ids != NULL) {
          last = seqrp->ids;
          while (last->next != NULL) {
            last = last->next;
          }
          this = MemNew (sizeof (IdRec));
          if (this != NULL) {
            this->id.accn = StringSave (str + 1);
          }
          last->next = this;
        } else {
          last = MemNew (sizeof (IdRec));
          if (last != NULL) {
            last->id.accn = StringSave (str + 1);
          }
          seqrp->ids = last;
        }
      } else if (str [0] != '\0') {
        chptr = str;
        while (*chptr != '\0') {
          ch = TO_UPPER (*chptr);
          if (ch >= 'A' && ch <= 'Z') {
            if (seqrp->nuc) {
              if (ch == 'U') {
                ch = 'T';
              }
              if (StringChr ("BDEFHIJKLMOPQRSUVWXYZ", ch) == NULL) {
                BSPutByte (seqrp->rawSeq, (Int2) ch);
                len++;
              }
            } else {
              if (StringChr ("JOU", ch) == NULL) {
                BSPutByte (seqrp->rawSeq, (Int2) ch);
                len++;
              }
            }
          }
          chptr++;
        }
      }
    }
    if (seqrp->nuc) {
      BSPutByte (seqrp->rawSeq, (Int2) 'N');
      BSPutByte (seqrp->rawSeq, (Int2) 'N');
      BSPutByte (seqrp->rawSeq, (Int2) 'N');
      len += 3;
    }
    if (len > 0) {
      if (seqrp->lookForStop) {
        BSPutByte (seqrp->rawSeq, (Int2) '*');
        len++;
      }
      BSWrite (seqrp->segLens, &len, sizeof (Int4));
      total += len;
      len = 0;
    }
    if (total > 0) {
      seqrp->sequence = BSMerge (seqrp->rawSeq, NULL);
      seqrp->length = total;
    } else {
      seqrp->sequence = NULL;
      seqrp->length = 0;
    }
  }
}