Exemplo n.º 1
0
void mix_genericerror(void)
{
  if (streq(statusline, "") || strfind(statusline, "...") ||
      strifind(statusline, "generating"))
    mix_status("Failed!");
  else
    mix_status(NULL);
}
Exemplo n.º 2
0
int chain_select(int hop[], char *chainstr, int maxrem, REMAILER *remailer,
		 int type, BUFFER *feedback)
{
/* hop[] is returned containing the chain as integers (0 means random like *)
 * chainstr is the input desired chain such as *,*,*,*
 * remailer is an input list of remailer details (see mix2_rlist())
 */
  int len = 0;
  int i, j, k;
  BUFFER *chain, *selected, *addr;
  chain = buf_new();
  selected = buf_new();
  addr = buf_new();

  if (chainstr == NULL || chainstr[0] == '\0')
    buf_sets(chain, CHAIN);
  else
    buf_sets(chain, chainstr);

  /* put the chain backwards: final hop is in hop[0] */

  for (i = chain->length; i >= 0; i--)
    if (i == 0 || chain->data[i - 1] == ','
	|| chain->data[i - 1] == ';' || chain->data[i - 1] == ':') {
      for (j = i; isspace(chain->data[j]);)	/* ignore whitespace */
	j++;
      if (chain->data[j] == '\0')
	break;

      if (chain->data[j] == '*')
	k = 0;
#if 0
      else if (isdigit(chain->data[j]))
	k = atoi(chain->data + j);
#endif /* 0 */
      else {
	buf_sets(selected, chain->data + j);
	rfc822_addr(selected, addr);
	buf_clear(selected);
	buf_getline(addr, selected);
	if (!selected->length)
	  buf_sets(selected, chain->data + j);

	for (k = 0; k < maxrem; k++)
	  if (((remailer[k].flags.mix && type == 0) ||
	       (remailer[k].flags.cpunk && type == 1) ||
	       (remailer[k].flags.newnym && type == 2)) &&
	      (streq(remailer[k].name, selected->data) ||
	       strieq(remailer[k].addr, selected->data) ||
	       (selected->data[0] == '@' && strifind(remailer[k].addr,
					    selected->data))))
	    break;
      }
      if (k < 0 || k >= maxrem) {
	if (feedback != NULL) {
		buf_appendf(feedback, "No such remailer: %b", selected);
		buf_nl(feedback);
	}
#if 0
	k = 0;
#else /* end of 0 */
	len = -1;
	goto end;
#endif /* else not 0 */
      }
      hop[len++] = k;
      if (len >= 20) {          /* array passed in is has length 20 */
	if (feedback != NULL) {
		buf_appends(feedback, "Chain too long.\n");
	}
	break;
      }
      if (i > 0)
	chain->data[i - 1] = '\0';
    }
end:
  buf_free(chain);
  buf_free(selected);
  buf_free(addr);
  return len;
}
Exemplo n.º 3
0
void GffLoader::load(GList<GenomicSeqData>& seqdata, GFValidateFunc* gf_validate, 
                          bool doCluster, bool doCollapseRedundant, 
						  bool matchAllIntrons, bool fuzzSpan, bool forceExons) {
	GffReader* gffr=new GffReader(f, this->transcriptsOnly, false); //not only mRNA features, not sorted
	gffr->showWarnings(this->showWarnings);
	//           keepAttrs   mergeCloseExons  noExonAttr
	gffr->readAll(this->fullAttributes,    this->mergeCloseExons,  this->noExonAttrs);
	GVec<int> pseudoAttrIds;
	GVec<int> pseudoFeatureIds;
	if (this->noPseudo) {
		GffNameList& fnames = gffr->names->feats;
		for (int i=0;i<fnames.Count();i++) {
			char* n=fnames[i]->name;
			if (startsWith(n, "pseudo")) {
				pseudoFeatureIds.Add(fnames[i]->idx);
			}
		}
		GffNameList& attrnames = gffr->names->attrs;
		for (int i=0;i<attrnames.Count();i++) {
			char* n=attrnames[i]->name;
			char* p=strifind(n, "pseudo");
			if (p==n || (p==n+2 && tolower(n[0])=='i' && tolower(n[1])=='s')) {
				pseudoAttrIds.Add(attrnames[i]->idx);
			}
		}
	}

	//int redundant=0; //redundant annotation discarded
	if (verbose) GMessage("   .. loaded %d genomic features from %s\n", gffr->gflst.Count(), fname.chars());
	//int rna_deleted=0;
	//add to GenomicSeqData, adding to existing loci and identifying intron-chain duplicates
	for (int k=0;k<gffr->gflst.Count();k++) {
		GffObj* m=gffr->gflst[k];
		if (strcmp(m->getFeatureName(), "locus")==0 &&
				m->getAttr("transcripts")!=NULL) {
			continue; //discard locus meta-features
		}
		if (this->noPseudo) {
			bool is_pseudo=false;
			for (int i=0;i<pseudoFeatureIds.Count();++i) {
				if (pseudoFeatureIds[i]==m->ftype_id) {
					is_pseudo=true;
					break;
				}
			}
			if (is_pseudo) continue;
			for (int i=0;i<pseudoAttrIds.Count();++i) {
				char* attrv=NULL;
				if (m->attrs!=NULL) attrv=m->attrs->getAttr(pseudoAttrIds[i]);
				if (attrv!=NULL) {
					char fc=tolower(attrv[0]);
					if (fc=='t' || fc=='y' || fc=='1') {
						is_pseudo=true;
						break;
					}
				}
			}
			if (is_pseudo) continue;
			//last resort:
			//  scan all the attribute values for "pseudogene" keyword (NCBI does that for "product" attr)
			/*
			 if (m->attrs!=NULL) {
				 for (int i=0;i<m->attrs->Count();++i) {
					 GffAttr& a=*(m->attrs->Get(i));
					 if (strifind(a.attr_val, "pseudogene")) {
						 is_pseudo=true;
						 break;
					 }
				 }
			 }
			 if (is_pseudo) continue;
			 */
		} //pseudogene detection requested
		char* rloc=m->getAttr("locus");
		if (rloc!=NULL && startsWith(rloc, "RLOC_")) {
			m->removeAttr("locus", rloc);
		}
		/*
     if (m->exons.Count()==0 && m->children.Count()==0) {
       //a non-mRNA feature with no subfeatures
       //add a dummy exon just to have the generic exon checking work
       m->addExon(m->start,m->end);
       }
		 */
		if (forceExons) {  // && m->children.Count()==0) {
			m->exon_ftype_id=gff_fid_exon;
		}
		//GList<GffObj> gfadd(false,false); -- for gf_validate()?
		if (gf_validate!=NULL && !(*gf_validate)(m, NULL)) {
			continue;
		}
		m->isUsed(true); //so the gffreader won't destroy it
		int i=-1;
		GenomicSeqData f(m->gseq_id);
		GenomicSeqData* gdata=NULL;
		if (seqdata.Found(&f,i)) gdata=seqdata[i];
		else { //entry not created yet for this genomic seq
			gdata=new GenomicSeqData(m->gseq_id);
			seqdata.Add(gdata);
		}
		/*
		for (int k=0;k<gfadd.Count();k++) {
			bool keep=placeGf(gfadd[k], gdata, doCluster, doCollapseRedundant, matchAllIntrons, fuzzSpan);
			if (!keep) {
				gfadd[k]->isUsed(false);
				//DEBUG
				GMessage("Feature %s(%d-%d) is going to be discarded..\n",gfadd[k]->getID(), gfadd[k]->start, gfadd[k]->end);
			}
		}
		*/
		bool keep=placeGf(m, gdata, doCluster, doCollapseRedundant, matchAllIntrons, fuzzSpan);
		if (!keep) {
			m->isUsed(false);
			//DEBUG
			//GMessage("Feature %s(%d-%d) is going to be discarded..\n",m->getID(), m->start, m->end);
		}
	} //for each read gffObj
	//if (verbose) GMessage("  .. %d records from %s clustered into loci.\n", gffr->gflst.Count(), fname.chars());
	if (f!=stdin) { fclose(f); f=NULL; }
	delete gffr;
}
Exemplo n.º 4
0
bool process_transcript(GFastaDb& gfasta, GffObj& gffrec) {
 //returns true if the transcript passed the filter
 char* gname=gffrec.getGeneName();
 if (gname==NULL) gname=gffrec.getGeneID();
 GStr defline(gffrec.getID());
 if (f_out && !fmtGTF) {
     const char* tname=NULL;
     if ((tname=gffrec.getAttr("transcript_name"))!=NULL) {
        gffrec.addAttr("Name", tname);
        gffrec.removeAttr("transcript_name");
        }
     }
 if (ensembl_convert && startsWith(gffrec.getID(), "ENS")) {
      const char* biotype=gffrec.getAttr("gene_biotype");
      if (biotype) {
         gffrec.addAttr("type", biotype);
         gffrec.removeAttr("gene_biotype");
         }
       else { //old Ensembl files lacking gene_biotype
         gffrec.addAttr("type", gffrec.getTrackName());
         }

      //bool is_gene=false;
      bool is_pseudo=false;
      if (strcmp(biotype, "protein_coding")==0 || gffrec.hasCDS())
                gffrec.setFeatureName("mRNA");
       else {
          if (strcmp(biotype, "processed_transcript")==0) 
              gffrec.setFeatureName("proc_RNA");
            else {
              //is_gene=endsWith(biotype, "gene");
              is_pseudo=strifind(biotype, "pseudo");
              if (is_pseudo) {
                   gffrec.setFeatureName("pseudo_RNA");
                   }
                else if (endsWith(biotype, "RNA")) {
                   gffrec.setFeatureName(biotype);
                   } else gffrec.setFeatureName("misc_RNA");
              }
          }
      }
 if (gname && strcmp(gname, gffrec.getID())!=0) {
   int* isonum=isoCounter.Find(gname);
   if  (isonum==NULL) {
       isonum=new int(1);
       isoCounter.Add(gname,isonum);
       }
      else (*isonum)++;
   defline.appendfmt(" gene=%s", gname);
   }
  int seqlen=0;

  const char* tlabel=tracklabel;
  if (tlabel==NULL) tlabel=gffrec.getTrackName();
  //defline.appendfmt(" track:%s",tlabel);
  char* cdsnt = NULL;
  char* cdsaa = NULL;
  int aalen=0;
  for (int i=1;i<gffrec.exons.Count();i++) {
     int ilen=gffrec.exons[i]->start-gffrec.exons[i-1]->end-1;
     if (ilen>4000000) 
            GMessage("Warning: very large intron (%d) for transcript %s\n",
                           ilen, gffrec.getID());
     if (ilen>maxintron) {
         return false;
         }
     }
  GList<GSeg> seglst(false,true);
  GFaSeqGet* faseq=fastaSeqGet(gfasta, gffrec);
  if (spliceCheck && gffrec.exons.Count()>1) {
    //check introns for splice site consensi ( GT-AG, GC-AG or AT-AC )
    if (faseq==NULL) GError("Error: no genomic sequence available!\n");
    int glen=gffrec.end-gffrec.start+1;
    const char* gseq=faseq->subseq(gffrec.start, glen);
    bool revcompl=(gffrec.strand=='-');
    bool ssValid=true;
    for (int e=1;e<gffrec.exons.Count();e++) {
      const char* intron=gseq+gffrec.exons[e-1]->end+1-gffrec.start;
      int intronlen=gffrec.exons[e]->start-gffrec.exons[e-1]->end-1;
      GSpliceSite acceptorSite(intron,intronlen,true, revcompl);
      GSpliceSite    donorSite(intron,intronlen, false, revcompl);
      //GMessage("%c intron %d-%d : %s .. %s\n",
      //           gffrec.strand, istart, iend, donorSite.nt, acceptorSite.nt);
      if (acceptorSite=="AG") { // GT-AG or GC-AG
         if (!donorSite.canonicalDonor()) {
            ssValid=false;break;
            }
         }
      else if (acceptorSite=="AC") { //
         if (donorSite!="AT") { ssValid=false; break; }
         }
      else { ssValid=false; break; }
      }
    //GFREE(gseq);
    if (!ssValid) {
      if (verbose)
         GMessage("Invalid splice sites found for '%s'\n",gffrec.getID());
      return false; //don't print this one!
      }
    }

  bool trprint=true;
  int stopCodonAdjust=0;
  int mCDphase=0;
  bool hasStop=false;
  if (gffrec.CDphase=='1' || gffrec.CDphase=='2')
      mCDphase = gffrec.CDphase-'0';
  if (f_y!=NULL || f_x!=NULL || validCDSonly) {
    if (faseq==NULL) GError("Error: no genomic sequence provided!\n");
    //if (protmap && fullCDSonly) {
    //if (protmap && (fullCDSonly ||  (gffrec.qlen>0 && gffrec.qend==gffrec.qlen))) {
    
    if (validCDSonly) { //make sure the stop codon is always included 
      //adjust_stopcodon(gffrec,3);
      stopCodonAdjust=adjust_stopcodon(gffrec,3);
      }
    int strandNum=0;
    int phaseNum=0;
  CDS_CHECK:
    cdsnt=gffrec.getSpliced(faseq, true, &seqlen, NULL, NULL, &seglst);
    if (cdsnt==NULL) trprint=false;
    else { //has CDS
      if (validCDSonly) {
         cdsaa=translateDNA(cdsnt, aalen, seqlen);
         char* p=strchr(cdsaa,'.');
         hasStop=false;
         if (p!=NULL) {
              if (p-cdsaa>=aalen-2) { //stop found as the last codon
                      *p='0';//remove it
                      hasStop=true;
                      if (aalen-2==p-cdsaa) {
                        //previous to last codon is the stop codon
                        //so correct the CDS stop accordingly
                        adjust_stopcodon(gffrec,-3, &seglst);
                        stopCodonAdjust=0; //clear artificial stop adjustment
                        seqlen-=3;
                        cdsnt[seqlen]=0;
                        }
                      aalen=p-cdsaa;
                      }
                   else {//stop found before the last codon
                      trprint=false;
                      }
              }//stop codon found
         if (trprint==false) { //failed CDS validity check
           //in-frame stop codon found
           if (altPhases && phaseNum<3) {
              phaseNum++;
              gffrec.CDphase = '0'+((mCDphase+phaseNum)%3);
              GFREE(cdsaa);
              goto CDS_CHECK;
              }
           if (gffrec.exons.Count()==1 && bothStrands) {
              strandNum++;
              phaseNum=0;
              if (strandNum<2) {
                 GFREE(cdsaa);
                 gffrec.strand = (gffrec.strand=='-') ? '+':'-';
                 goto CDS_CHECK; //repeat the CDS check for a different frame
                 }
              }
           if (verbose) GMessage("In-frame STOP found for '%s'\n",gffrec.getID());
           } //has in-frame STOP
         if (fullCDSonly) {
             if (!hasStop || cdsaa[0]!='M') trprint=false;
             }
         } // CDS check requested
      } //has CDS
    } //translation or codon check/output was requested
  if (!trprint) {
    GFREE(cdsnt);
    GFREE(cdsaa);
    return false;
    }
  if (stopCodonAdjust>0 && !hasStop) {
          //restore stop codon location
          adjust_stopcodon(gffrec, -stopCodonAdjust, &seglst);
          if (cdsnt!=NULL && seqlen>0) {
             seqlen-=stopCodonAdjust;
             cdsnt[seqlen]=0;
             }
          if (cdsaa!=NULL) aalen--;
          }

  if (f_y!=NULL) { //CDS translation fasta output requested
         //char* 
         if (cdsaa==NULL) { //translate now if not done before
           cdsaa=translateDNA(cdsnt, aalen, seqlen);
           }
         if (fullattr && gffrec.attrs!=NULL) {
             //append all attributes found for each transcripts
              for (int i=0;i<gffrec.attrs->Count();i++) {
                defline.append(" ");
                defline.append(gffrec.getAttrName(i));
                defline.append("=");
                defline.append(gffrec.getAttrValue(i));
                }
              }
         printFasta(f_y, defline, cdsaa, aalen);
         }
   if (f_x!=NULL) { //CDS only
         if (writeExonSegs) {
              defline.append(" loc:");
              defline.append(gffrec.getGSeqName());
              defline.appendfmt("(%c)",gffrec.strand);
              //warning: not CDS coordinates are written here, but the exon ones
              defline+=(int)gffrec.start;
              defline+=(char)'-';
              defline+=(int)gffrec.end;
              // -- here these are CDS substring coordinates on the spliced sequence:
              defline.append(" segs:");
              for (int i=0;i<seglst.Count();i++) {
                  if (i>0) defline.append(",");
                  defline+=(int)seglst[i]->start;
                  defline.append("-");
                  defline+=(int)seglst[i]->end;
                  }
              }
         if (fullattr && gffrec.attrs!=NULL) {
             //append all attributes found for each transcript
              for (int i=0;i<gffrec.attrs->Count();i++) {
                defline.append(" ");
                defline.append(gffrec.getAttrName(i));
                defline.append("=");
                defline.append(gffrec.getAttrValue(i));
                }
              }
         printFasta(f_x, defline, cdsnt, seqlen);
         }
 GFREE(cdsnt);
 GFREE(cdsaa);
 if (f_w!=NULL) { //write spliced exons
    uint cds_start=0;
    uint cds_end=0;
    seglst.Clear();
    char* exont=gffrec.getSpliced(faseq, false, &seqlen, &cds_start, &cds_end, &seglst);
    if (exont!=NULL) {
    if (gffrec.CDstart>0) {
        defline.appendfmt(" CDS=%d-%d", cds_start, cds_end);
        }
      if (writeExonSegs) {
        defline.append(" loc:");
        defline.append(gffrec.getGSeqName());
        defline+=(char)'|';
        defline+=(int)gffrec.start;
        defline+=(char)'-';
        defline+=(int)gffrec.end;
        defline+=(char)'|';
        defline+=(char)gffrec.strand;
        defline.append(" exons:");
        for (int i=0;i<gffrec.exons.Count();i++) {
                if (i>0) defline.append(",");
                defline+=(int)gffrec.exons[i]->start;
                defline.append("-");
                defline+=(int)gffrec.exons[i]->end;
                }
        defline.append(" segs:");
        for (int i=0;i<seglst.Count();i++) {
            if (i>0) defline.append(",");
            defline+=(int)seglst[i]->start;
            defline.append("-");
            defline+=(int)seglst[i]->end;
            }
        }
      if (fullattr && gffrec.attrs!=NULL) {
       //append all attributes found for each transcripts
        for (int i=0;i<gffrec.attrs->Count();i++) {
          defline.append(" ");
          defline.append(gffrec.getAttrName(i));
          defline.append("=");
          defline.append(gffrec.getAttrValue(i));
          }
        }
      printFasta(f_w, defline, exont, seqlen);
      GFREE(exont);
      }
    } //writing f_w (spliced exons)
 return true;
}
Exemplo n.º 5
0
int t1_rlist(REMAILER remailer[], int badchains[MAXREM][MAXREM])
{
  FILE *list, *excl;
  int i, listed = 0;
  int n = 0;
  char line[2 * LINELEN], l2[LINELEN], name[LINELEN], *flags;
  BUFFER *starex;

  starex = buf_new();
  excl = mix_openfile(STAREX, "r");
  if (excl != NULL) {
    buf_read(starex, excl);
    fclose(excl);
  }

  list = mix_openfile(TYPE1LIST, "r");
  if (list == NULL) {
    buf_free(starex);
    return (-1);
  }

  while (fgets(line, sizeof(line), list) != NULL && n < MAXREM) {
    if (strleft(line, "$remailer") &&
	strchr(line, '<') && strchr(line, '>') &&
	strchr(line, '{') && strchr(line, '{') + 4 < strchr(line, '}')) {
      if (line[strlen(line) - 1] == '\n')
	line[strlen(line) - 1] = '\0';
      if (line[strlen(line) - 1] == '\r')
	line[strlen(line) - 1] = '\0';
      while (line[strlen(line) - 1] == ' ')
	line[strlen(line) - 1] = '\0';
      if (line[strlen(line) - 1] != ';'
	  && fgets(l2, sizeof(l2), list) != NULL)
	strcatn(line, l2, LINELEN);
      flags = strchr(line, '>');
      strncpy(name, strchr(line, '{') + 2,
	      strchr(line, '}') - strchr(line, '{') - 3);
      name[strchr(line, '}') - strchr(line, '{') - 3] = '\0';
      name[20] = '\0';

      for (i = 1; i <= n; i++)
	if (streq(name, remailer[i].name))
	  break;
      if (i > n) {
	/* not in mix list */
	n++;
	strcpy(remailer[i].name, name);
	strncpy(remailer[i].addr, strchr(line, '<') + 1,
		strchr(line, '>') - strchr(line, '<'));
	remailer[i].addr[strchr(line, '>') - strchr(line, '<') - 1]
	  = '\0';
	remailer[i].flags.mix = 0;
	remailer[i].flags.post = strifind(flags, " post");
      }
      remailer[i].flags.cpunk = strfind(flags, " cpunk");
      remailer[i].flags.pgp = strfind(flags, " pgp");
      remailer[i].flags.pgponly = strfind(flags, " pgponly");
      remailer[i].flags.latent = strfind(flags, " latent");
      remailer[i].flags.middle = strfind(flags, " middle");
      remailer[i].flags.ek = strfind(flags, " ek");
      remailer[i].flags.esub = strfind(flags, " esub");
      remailer[i].flags.hsub = strfind(flags, " hsub");
      remailer[i].flags.newnym = strfind(flags, " newnym");
      remailer[i].flags.nym = strfind(flags, " nym");
      remailer[i].info[1].reliability = 0;
      remailer[i].info[1].latency = 0;
      remailer[i].info[1].history[0] = '\0';
      remailer[i].flags.star_ex = bufifind(starex, name);
   }
    if (strleft(line,
		"-----------------------------------------------------------------------"))
      break;
  }
  n++;				/* ?? */
  while (fgets(line, sizeof(line), list) != NULL) {
    if (strlen(line) >= 72 && strlen(line) <= 73)
      for (i = 1; i < n; i++)
	if (strleft(line, remailer[i].name) &&
	    line[strlen(remailer[i].name)] == ' ') {
	  strncpy(remailer[i].info[1].history, line + 42, 12);
	  remailer[i].info[1].history[12] = '\0';
	  remailer[i].info[1].reliability = 10000 * N(line[64])
	    + 1000 * N(line[65]) + 100 * N(line[66])
	    + 10 * N(line[68]) + N(line[69]);
	  remailer[i].info[1].latency = 36000 * N(line[55])
	    + 3600 * N(line[56]) + 600 * N(line[58])
	    + 60 * N(line[59]) + 10 * N(line[61])
	    + N(line[62]);
	  listed++;
	}
  }
  fclose(list);
  parse_badchains(badchains, TYPE1LIST, "Broken type-I remailer chains", remailer, n);
  if (listed < 4)		/* we have no valid reliability info */
    for (i = 1; i < n; i++)
      remailer[i].info[1].reliability = 10000;

#ifdef USE_PGP
  pgp_rlist(remailer, n);
#endif /* USE_PGP */
  buf_free(starex);
  return (n);
}