Esempio n. 1
0
Vector *HomologyAdaptor_listStableIdsFromSpecies(HomologyAdaptor *ha, char *sp)  {
  StatementHandle *sth;
  ResultRow *row;
  Vector *genes;
  char qStr[1024];
  char *species;

  species = StrUtil_copyString(&species,sp,0);
  species = StrUtil_strReplChr(species,'_',' ');

  sprintf(qStr,
          "select  distinct grm.member_stable_id "
          " from    gene_relationship_member grm,"
          "         genome_db gd "
          " where   gd.genome_db_id = grm.genome_db_id "
          " and     gd.name = '%s'", species);


  sth = ha->prepare((BaseAdaptor *)ha, qStr, strlen(qStr));
  sth->execute(sth);

  genes = Vector_new();

  while ((row = sth->fetchRow(sth))) {
    char *tmpStr;
    Vector_addElement(genes,StrUtil_copyString(&tmpStr, row->getStringAt(row,0),0));
  }
  sth->finish(sth);

  free(species);

  return genes;
}                               
Esempio n. 2
0
int main(int argc, char *argv[]) {
  Vector *pieces;
  char *str;
  char *reverse;
  int testResult = 0;

  initEnsC(argc, argv);

  pieces = CigarStrUtil_getPieces(cigar1);

  testResult += ok(1, Vector_getNumElement(pieces) == 6);
  
  str = Vector_getElementAt(pieces,0);

  testResult += ok(2, !strcmp(str,"6M"));

  Vector_free(pieces);

  StrUtil_copyString(&str,cigar1,0);
  
  testResult += ok(3, !strcmp(str,cigar1));
  
  reverse = CigarStrUtil_reverse(str,strlen(str));
  free(str);

  pieces = CigarStrUtil_getPieces(reverse);

  testResult += ok(4, Vector_getNumElement(pieces) == 6);

  str = Vector_getElementAt(pieces,0);

  testResult += ok(5, !strcmp(str,"M"));

  return testResult;
}
Esempio n. 3
0
char *PredictionTranscript_getcDNA(PredictionTranscript *trans) {
    Vector *exons = PredictionTranscript_getAllExons(trans,0);
    char *cdna = StrUtil_copyString(&cdna, "", 0);
    //int lastPhase = 0;
    int i;
    int first = 1;

    int cdnaStart;
    int pepStart;

    cdnaStart = 1;
    pepStart = 1;

    for (i=0; i<Vector_getNumElement(exons); i++) {
        PredictionExon *exon = Vector_getElementAt(exons, i);
        int phase;
        if (!exon) {
            if (cdna[0] == '\0') {
                continue;
            } else {
                break;
            }
        }

        phase = 0;

// NIY    if (defined($exon->phase)) {
        phase = PredictionExon_getPhase(exon);
//    }

        //fprintf(stderr, " phase for exon %d is %d\n", i, phase);

        if (first) {
            cdna = SeqUtil_addNs(cdna,phase);
            first = 0;
        }

        /*
        // Hack for now - should never happen
            if (phase != lastPhase ) {

              if (lastPhase == 1) {
        	cdna = StrUtil_appendString(cdna,"NN");
              } else if (lastPhase == 2) {
        	cdna = StrUtil_appendString(cdna,"N");
              }

              // startpadding for this exon
              cdna = SeqUtil_addNs(cdna,phase);
            }
        */

        cdna = StrUtil_appendString(cdna, PredictionExon_getSeqString(exon));
        //lastPhase = PredictionExon_getEndPhase(exon);
        //lastPhase = phase;
    }

// NIY Freeing exons vector?
    return cdna;
}
Esempio n. 4
0
char *GenomicAlignAdaptor_alignmentTypeByMethodLinkId(GenomicAlignAdaptor *gaa, IDType methodLinkId) {
  StatementHandle *sth;
  ResultRow *row;
  char qStr[512];
  char *alignmentType = NULL;
  int ok = 1;

  if (!methodLinkId) {
    fprintf(stderr, "Error: methodLinkId has to be defined");
    ok = 0;
  } 

  if (ok) {
    sprintf(qStr,"SELECT type FROM method_link WHERE method_link_id = " IDFMTSTR, methodLinkId);
    sth = gaa->prepare((BaseAdaptor *)gaa, qStr, strlen(qStr));
    sth->execute(sth);

    if ((row = sth->fetchRow(sth))) {
      alignmentType = StrUtil_copyString(&alignmentType, row->getStringAt(row,0), 0); 
    } else {
      fprintf(stderr,"Error: No alignmentType for " IDFMTSTR "\n",methodLinkId);
      ok = 0;
    }
  }

  if (ok) {
    sth->finish(sth);
  }

// NIY switch to using passed in string
  return alignmentType;
}
Esempio n. 5
0
char *CoordSystem_getDbIDStr(CoordSystem *cs) {
  if (cs->dbIdStr == NULL) {
    char tmpStr[1024];
    sprintf(tmpStr,IDFMTSTR,CoordSystem_getDbID(cs));
    StrUtil_copyString(&cs->dbIdStr, tmpStr, 0);
  }
 
  return cs->dbIdStr;
}
Esempio n. 6
0
char *CoordSystem_getNameColonVersion(CoordSystem *cs) {
  if (cs && cs->nameColonVersion == NULL) {
    char tmpStr[1024];
    sprintf(tmpStr,"%s:%s",CoordSystem_getName(cs),CoordSystem_getVersion(cs) ? CoordSystem_getVersion(cs):"");
    StrUtil_copyString(&cs->nameColonVersion, tmpStr, 0);
    cs->lenNameColonVersion = strlen(tmpStr);
  }
 
  return cs? cs->nameColonVersion : NULL;
}
Esempio n. 7
0
char *Sequence_setSeq(Sequence *seq, char *seqstr) {
  if (StrUtil_copyString(&(seq->seq),seqstr,0)) {
    fprintf(stderr,"ERROR: Failed allocating space for seq\n");
    return NULL;
  }

  seq->length = strlen(seq->seq);

  return seq->seq;
}
Esempio n. 8
0
Vector *HomologyAdaptor_fetchHomologuesOfGeneInSpecies(HomologyAdaptor *ha, 
                          char *sp, char *gene, char *hSp) {
  char qStr[1024];
  Vector *genes;
  IDType *relationshipIds;
  int nRelationship;
  int i;
  char *hSpecies;
  char *species;

  species = StrUtil_copyString(&species,sp,0);
  species = StrUtil_strReplChr(species,'_',' ');

  hSpecies = StrUtil_copyString(&hSpecies,hSp,0);
  hSpecies = StrUtil_strReplChr(hSpecies,'_',' ');

  sprintf(qStr,
           "select grm.gene_relationship_id "
           " from   gene_relationship_member grm, "
           "        genome_db gd "
           " where  gd.genome_db_id = grm.genome_db_id "
           " and    gd.name = '%s' "
           " and    grm.member_stable_id = '%s' "
           " group by grm.gene_relationship_id", species, gene);

  nRelationship = HomologyAdaptor_getRelationships(ha,qStr,&relationshipIds);

  genes = Vector_new();
  for (i=0;i<nRelationship;i++) {
    Vector *homols = HomologyAdaptor_fetchHomologuesBySpeciesRelationshipId(ha,hSpecies,relationshipIds[i]);
    Vector_append(genes, homols);
    Vector_free(homols);
  }

  free(relationshipIds);
  free(species);
  free(hSpecies);

  return genes;
}
Esempio n. 9
0
Vector *HomologyAdaptor_fetchHomologuesOfGene(HomologyAdaptor *ha, char *sp, char *gene) {
  char qStr[1024];
  char *species;
  int nRelationship;
  IDType *relationshipIds;
  Vector *genes;
  int i;

  species = StrUtil_copyString(&species,sp,0);
  species = StrUtil_strReplChr(species,'_',' ');

  sprintf(qStr,
            "select grm.gene_relationship_id "
            " from   gene_relationship_member grm, "
            "        genome_db gd "
            " where  gd.genome_db_id = grm.genome_db_id "
            " and    gd.name = '%s' "
            " and    grm.member_stable_id = '%s' "
            " group by grm.gene_relationship_id", species, gene);

  nRelationship = HomologyAdaptor_getRelationships(ha, qStr,&relationshipIds);

  genes = Vector_new();
  for (i=0;i<nRelationship;i++) {
    Vector *homols;
    sprintf(qStr,
               "select   grm.member_stable_id,"
               "         gd.name,"
               "         grm.chromosome,"
               "         grm.chrom_start,"
               "         grm.chrom_end"
               " from    gene_relationship_member grm,"
               "         genome_db gd"
               " where   grm.gene_relationship_id = " IDFMTSTR 
               " and     grm.genome_db_id = gd.genome_db_id "
               " and NOT (grm.member_stable_id = '%s')", relationshipIds[i], gene);

    homols = HomologyAdaptor_getHomologues(ha, qStr);
    Vector_append(genes,homols);
    Vector_free(homols);
  }

  free(relationshipIds);
  free(species);

  return genes;
}
Esempio n. 10
0
int EcoString_copyStr(ECOSTRTABLE *EcoSTabP, ECOSTRING *To, char *From,int StartPos) {
  char *TmpStr;

  *To = NULL;

/* Generate the string */
  if (!StrUtil_copyString(&TmpStr,From,StartPos)) {
    Error_trace("EcoString_copyStr",NULL);
    return 0;
  }
  if (!EcoString_getPointer(EcoSTabP,To,TmpStr)) {
    Error_trace("EcoString_copyStr",NULL);
    return 0;
  }

/* Return success */
  return 1;
}
Esempio n. 11
0
void GenomicAlignAdaptor_store(GenomicAlignAdaptor *gaa, Vector *genomicAligns) {
  int ok = 1;
  char *qStr = NULL;
  StatementHandle *sth;
  char commaStr[2] = {'\0','\0'};
  int i;
  char *tmpStr = NULL;
  
  if ((tmpStr = (char *)calloc(65556,sizeof(char))) == NULL) {
    fprintf(stderr,"Failed allocating tmpStr\n");
    ok = 0;
  }

  if (ok) {
    StrUtil_copyString(&qStr, "INSERT INTO genomic_align_block"
                       " (consensus_dnafrag_id, consensus_start, consensus_end,"
                       "  query_dnafrag_id, query_start, query_end, query_strand, method_link_id,"
                       "  score, perc_id, cigar_line) VALUES ",0);
  
    for (i=0; i<Vector_getNumElement(genomicAligns); i++) {
      GenomicAlign *ga = Vector_getElementAt(genomicAligns,i);
      DNAFrag *consDNAFrag  = GenomicAlign_getConsensusDNAFrag(ga);
      DNAFrag *queryDNAFrag = GenomicAlign_getQueryDNAFrag(ga);

      // check that everything has dbIDs
      if (!DNAFrag_getDbID(consDNAFrag) || !DNAFrag_getDbID(queryDNAFrag)) {
        fprintf(stderr, "Error: dna_fragment in GenomicAlign is not in DB\n");
        ok = 0;
        break;
      }
    }
  }

  GenomicAlign *ga = NULL;
  DNAFrag *consDNAFrag = NULL;
  DNAFrag *queryDNAFrag = NULL;
  IDType methodLinkId = 0;

  if (ok) {
    // all clear for storing
  
    for (i=0; i<Vector_getNumElement(genomicAligns); i++) {
      ga = Vector_getElementAt(genomicAligns,i);
      consDNAFrag  = GenomicAlign_getConsensusDNAFrag(ga);
      queryDNAFrag = GenomicAlign_getQueryDNAFrag(ga);

      methodLinkId = GenomicAlignAdaptor_methodLinkIdByAlignmentType(gaa, GenomicAlign_getAlignmentType(ga));

      if (!methodLinkId) {
        fprintf(stderr, "Error: There is no method_link with this type [%s] in the DB.\n",
                GenomicAlign_getAlignmentType(ga));
        ok = 0;
        break;
      }
    }
    
    if (ok) {
      sprintf(tmpStr," %s(" IDFMTSTR ", %d, %d, " IDFMTSTR ", %d, %d, %d, " IDFMTSTR ", %f, %f, '%s')", 
              commaStr, 
              DNAFrag_getDbID(consDNAFrag),
              GenomicAlign_getConsensusStart(ga),
              GenomicAlign_getConsensusEnd(ga),
              DNAFrag_getDbID(queryDNAFrag),  
              GenomicAlign_getQueryStart(ga),
              GenomicAlign_getQueryEnd(ga),
              GenomicAlign_getQueryStrand(ga),
              methodLinkId,
              GenomicAlign_getScore(ga),
              GenomicAlign_getPercentId(ga),
              GenomicAlign_getCigarString(ga));

      qStr = StrUtil_appendString(qStr, tmpStr);
      commaStr[0] = ','; 
    }
  
    sth = gaa->prepare((BaseAdaptor *)gaa, qStr, strlen(qStr));
    sth->execute(sth);
    sth->finish(sth);
  }

  if (qStr)
    free(qStr);

  if (tmpStr)
    free(tmpStr);
}
Esempio n. 12
0
char *GenomeDB_setName(GenomeDB *gdb, char *name) {
  StrUtil_copyString(&(gdb->name), name, 0);

  return gdb->name;
}
Esempio n. 13
0
char *GenomeDB_setAssembly(GenomeDB *gdb, char *assembly) {
  StrUtil_copyString(&(gdb->assembly), assembly, 0);

  return gdb->assembly;
}
Esempio n. 14
0
char *RawContig_setSeq(RawContig *contig, char *seq) {
  // Sequence can be set manually
  StrUtil_copyString(&(contig->seq),seq,0);

  return contig->seq;
}
Esempio n. 15
0
char *Mapper_setTo(Mapper *m, char *to) {
  return StrUtil_copyString(&(m->to), to, 0);
}
Esempio n. 16
0
char *Mapper_setFrom(Mapper *m, char *from) {
  return StrUtil_copyString(&(m->from), from, 0);
  //return m->from;
}
Esempio n. 17
0
void GenomicAlignAdaptor_addDerivedAlignments(GenomicAlignAdaptor *gaa, 
                     Vector *mergedAligns, GenomicAlign *alignA, GenomicAlign *alignB) {

  // variable name explanation
  // q - query c - consensus s - start e - end l - last
  // o, ov overlap j - jump_in_
  // r - result

  int  qs, qe, lqs, lqe, cs, ce, lce,
       ocs, oce, oqs, oqe, jc, jq, ovs, ove,
       rcs, rce, rqs, rqe;
  int currentMatch = 0;
  int newMatch;
  int cigAPos = 0, cigBPos = 0;
  char *resultCig;
  char tmpStr[128];

  // initialization phase
  Vector *cigA = CigarStrUtil_getPieces(GenomicAlign_getCigarString(alignA));
  Vector *cigB = CigarStrUtil_getPieces(GenomicAlign_getCigarString(alignB));

  if (GenomicAlign_getQueryStrand(alignA) == -1 ) {
    Vector_reverse(cigB);
  }

  // need a 'normalized' start for qs, qe, oxs so I dont 
  // have to check strandedness all the time  

  // consensus is strand 1 and is not compared to anything,
  // can keep its original coordinate system
 
  lce = GenomicAlign_getConsensusStart(alignA) - 1;
  ce = lce;
  cs = ce + 1;
  
  // alignBs query can be + or - just keep relative coords for now
  lqe = 0; lqs = 1;
  qe = 0; qs = 1;

  // ocs will be found relative to oce and has to be comparable
  // to oqs. But it could be that we have to move downwards if we
  // are not - strand. thats why coordinates are transformed here

  if (GenomicAlign_getQueryStrand(alignA) == -1 ) {
    // query_end is first basepair of alignment
    if (GenomicAlign_getQueryEnd(alignA) < GenomicAlign_getConsensusEnd(alignB)) {
      oce = 0; ocs = 1;
      oqe = GenomicAlign_getConsensusEnd(alignB) - GenomicAlign_getQueryEnd(alignA);
      oqs = oqe + 1;
    } else {
      oqe = 0; oqs = 1;
      oce = GenomicAlign_getQueryEnd(alignA) - GenomicAlign_getConsensusEnd(alignB);
      ocs = oce + 1;
    }
  } else {
    // in theory no coordinate magic necessary :-)
    oqs = GenomicAlign_getQueryStart(alignA);
    oqe = oqs - 1; 
    ocs = GenomicAlign_getConsensusStart(alignB);
    oce = ocs - 1;
  }

  // initializing result
  rcs = rce = rqs = rqe = 0;
  resultCig= StrUtil_copyString(&resultCig,"",0);

  while (1) {
    int newGa;
    // exit if you request a new piece of alignment and the cig list is 
    // empty

    if (oce < ocs || oce < oqs) {
      // next M area in cigB
      if (cigBPos == Vector_getNumElement(cigB)) break;
      GenomicAlignAdaptor_nextCig(gaa, cigB, &cigBPos, &ocs, &oce, &qs, &qe ); 
      continue;
    }
    if (oqe < oqs || oqe < ocs) {
      // next M area in cigA
      if (cigAPos == Vector_getNumElement(cigA)) break;
      GenomicAlignAdaptor_nextCig(gaa, cigA, &cigAPos, &cs, &ce, &oqs, &oqe );
      continue;
    }

    // now matching region overlap in reference genome
    ovs = ocs < oqs ? oqs : ocs;
    ove = oce < oqe ? oce : oqe;
    
    if (currentMatch) {
      jc = cs + (ovs - oqs) - lce - 1;
      jq = qs + (ovs - ocs) - lqe - 1;
    } else {
      jc = jq = 0;
    }

    newMatch = ove - ovs + 1;
    newGa = 0;

    if (jc==0) {
      if (jq==0) {
	currentMatch += newMatch;
      } else {
        // store current match;
        sprintf(tmpStr,"%dM",currentMatch);
        resultCig = StrUtil_appendString(resultCig,tmpStr);

	// jq deletions;
	if (jq == 1) {
          resultCig = StrUtil_appendString(resultCig,"D");
        } else {
          sprintf(tmpStr,"%dD",jq);
          resultCig = StrUtil_appendString(resultCig,tmpStr);
        }
	currentMatch = newMatch;
      }
    } else {
      if (jq==0) {
        // store current match;
        sprintf(tmpStr,"%dM",currentMatch);
        resultCig = StrUtil_appendString(resultCig,tmpStr);

	// jc insertions;
	if (jc==1) {
          resultCig = StrUtil_appendString(resultCig,"I");
        } else {
          sprintf(tmpStr,"%dI",jc);
          resultCig = StrUtil_appendString(resultCig,tmpStr);
        }
	currentMatch = newMatch;
         
      } else {
        double percId;
        double score;
        GenomicAlign *ga;

        sprintf(tmpStr,"%dM",currentMatch);
        resultCig = StrUtil_appendString(resultCig,tmpStr);

	// new GA
	int queryStrand = GenomicAlign_getQueryStrand(alignA) * GenomicAlign_getQueryStrand(alignB);
	int queryStart, queryEnd;
	if (queryStrand == 1) {
	  queryStart = rqs + GenomicAlign_getQueryStart(alignB) - 1;
	  queryEnd = rqe + GenomicAlign_getQueryStart(alignB) - 1;
	} else {
	  queryEnd = GenomicAlign_getQueryEnd(alignB) - rqs + 1;
	  queryStart = GenomicAlign_getQueryEnd(alignB) - rqe + 1;
	}
      
        score = (GenomicAlign_getScore(alignA) < GenomicAlign_getScore(alignB)) ? 
          GenomicAlign_getScore(alignA) : GenomicAlign_getScore(alignB);
        percId =  (int)(GenomicAlign_getPercentId(alignA)*GenomicAlign_getPercentId(alignB)/100.0);
        
        ga = GenomicAlign_new();
    
        GenomicAlign_setConsensusDNAFrag(ga, GenomicAlign_getConsensusDNAFrag(alignA));
        GenomicAlign_setQueryDNAFrag(ga, GenomicAlign_getQueryDNAFrag(alignB));
        GenomicAlign_setCigarString(ga, resultCig);
        GenomicAlign_setConsensusStart(ga, rcs);
        GenomicAlign_setConsensusEnd(ga, rce);
        GenomicAlign_setQueryStrand(ga, queryStrand);
        GenomicAlign_setQueryStart(ga, queryStart);
        GenomicAlign_setQueryEnd(ga, queryEnd);
        GenomicAlign_setAdaptor(ga, (BaseAdaptor *)gaa);
        GenomicAlign_setPercentId(ga, percId);
        GenomicAlign_setScore(ga, score);

	Vector_addElement(mergedAligns, ga);

        rcs = rce = rqs = rqe = 0;
	resultCig[0] = '\0';
	
	currentMatch = newMatch;
      }
    }


    
    if (!rcs) rcs = cs+(ovs-oqs);
    rce = cs+(ove-oqs);
    if (!rqs) rqs = qs+(ovs-ocs);
    rqe = qs+(ove-ocs);

    // update the last positions
    lce = rce; 
    lqe = rqe;

    // next piece on the one that end earlier
 
    if (oce <= oqe) {
      // next M area in cigB
      if (cigBPos == Vector_getNumElement(cigB)) break;
      GenomicAlignAdaptor_nextCig(gaa, cigB, &cigBPos, &ocs, &oce, &qs, &qe ); 
    }
    if (oce >= oqe) {
      // next M area in cigA
      if (cigAPos == Vector_getNumElement(cigA)) break;
      GenomicAlignAdaptor_nextCig(gaa, cigA, &cigAPos, &cs, &ce, &oqs, &oqe );
    } 
  } // end of while loop

  // if there is a last floating current match
  if (currentMatch) {
    
    // new GA
    int queryStrand = GenomicAlign_getQueryStrand(alignA) * GenomicAlign_getQueryStrand(alignB);
    int queryStart, queryEnd;
    double percId;
    double score;
    GenomicAlign *ga;

    sprintf(tmpStr,"%dM",currentMatch);
    resultCig = StrUtil_appendString(resultCig, tmpStr);

    if (queryStrand == 1) {
      queryStart = rqs + GenomicAlign_getQueryStart(alignB) - 1;
      queryEnd = rqe + GenomicAlign_getQueryStart(alignB) - 1;
    } else {
      queryEnd = GenomicAlign_getQueryEnd(alignB) - rqs + 1;
      queryStart = GenomicAlign_getQueryEnd(alignB) - rqe + 1;
    }
  
    score = (GenomicAlign_getScore(alignA) < GenomicAlign_getScore(alignB)) ? 
      GenomicAlign_getScore(alignA) : GenomicAlign_getScore(alignB);
    percId =  (int)(GenomicAlign_getPercentId(alignA)*GenomicAlign_getPercentId(alignB)/100.0);
    
    ga = GenomicAlign_new();

    GenomicAlign_setConsensusDNAFrag(ga, GenomicAlign_getConsensusDNAFrag(alignA));
    GenomicAlign_setQueryDNAFrag(ga, GenomicAlign_getQueryDNAFrag(alignB));
    GenomicAlign_setCigarString(ga, resultCig);
    GenomicAlign_setConsensusStart(ga, rcs);
    GenomicAlign_setConsensusEnd(ga, rce);
    GenomicAlign_setQueryStrand(ga, queryStrand);
    GenomicAlign_setQueryStart(ga, queryStart);
    GenomicAlign_setQueryEnd(ga, queryEnd);
    GenomicAlign_setAdaptor(ga, (BaseAdaptor *)gaa);
    GenomicAlign_setPercentId(ga, percId);
    GenomicAlign_setScore(ga, score);

    Vector_addElement(mergedAligns, ga);
  }

  free(resultCig);

  Vector_free(cigA);
  Vector_free(cigB);

  // nothing to return all in merged_aligns
}
Esempio n. 18
0
int main(int argc, char *argv[]) {
  DBAdaptor *      dba;
  StatementHandle *sth;
  ResultRow *      row;
  Vector *         slices;
  int              nSlices;
  htsFile *      out;

  int   argNum = 1;

  char *inFName  = NULL;
  char *outFName = NULL;

  char *dbUser = "******";
  char *dbPass = NULL;
  int   dbPort = 3306;

  char *dbHost = "ens-staging.internal.sanger.ac.uk";
  char *dbName = "homo_sapiens_core_71_37";

  char *assName = "GRCh37";

  char *chrName = "1";


  int flags = 0;
  int   threads  = 1;

  initEnsC(argc, argv);

  while (argNum < argc) {
    char *arg = argv[argNum];
    char *val;

// Ones without a val go here
    if (!strcmp(arg, "-U") || !strcmp(arg,"--ucsc_naming")) {
      flags |= M_UCSC_NAMING;
    } else {
// Ones with a val go in this block
      if (argNum == argc-1) {
        Bamcov_usage();
      }

      val = argv[++argNum];
  
      if (!strcmp(arg, "-i") || !strcmp(arg,"--in_file")) {
        StrUtil_copyString(&inFName,val,0);
      } else if (!strcmp(arg, "-o") || !strcmp(arg,"--out_file")) {
        StrUtil_copyString(&outFName,val,0);
      } else if (!strcmp(arg, "-h") || !strcmp(arg,"--host")) {
        StrUtil_copyString(&dbHost,val,0);
      } else if (!strcmp(arg, "-p") || !strcmp(arg,"--password")) {
        StrUtil_copyString(&dbPass,val,0);
      } else if (!strcmp(arg, "-P") || !strcmp(arg,"--port")) {
        dbPort = atoi(val);
      } else if (!strcmp(arg, "-n") || !strcmp(arg,"--name")) {
        StrUtil_copyString(&dbName,val,0);
      } else if (!strcmp(arg, "-u") || !strcmp(arg,"--user")) {
        StrUtil_copyString(&dbUser,val,0);
      } else if (!strcmp(arg, "-t") || !strcmp(arg,"--threads")) {
        threads = atoi(val);
      } else if (!strcmp(arg, "-a") || !strcmp(arg,"--assembly")) {
        StrUtil_copyString(&assName,val,0);
      } else if (!strcmp(arg, "-v") || !strcmp(arg,"--verbosity")) {
        verbosity = atoi(val);
// Temporary
      } else if (!strcmp(arg, "-c") || !strcmp(arg,"--chromosome")) {
        StrUtil_copyString(&chrName,val,0);
      } else {
        fprintf(stderr,"Error in command line at %s\n\n",arg);
        Bamcov_usage();
      }
    }
    argNum++;
  }

  if (verbosity > 0) {
    printf("Program for calculating read coverage in a BAM file \n"
           "Steve M.J. Searle.  [email protected]  Last update April 2013.\n");
  }

  if (!inFName || !outFName) {
    Bamcov_usage();
  }

  dba = DBAdaptor_new(dbHost,dbUser,dbPass,dbName,dbPort,NULL);

  //nSlices = getSlices(dba, destName);
  nSlices = 1;

  slices = Vector_new();

  SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(dba);

  Slice *slice = SliceAdaptor_fetchByRegion(sa,NULL,chrName,POS_UNDEF,POS_UNDEF,1,NULL, 0);

  Vector_addElement(slices,slice);

  if (Vector_getNumElement(slices) == 0) {
    fprintf(stderr, "Error: No slices.\n");
    exit(1);
  }

  htsFile *in = hts_open(inFName, "rb");
  if (in == 0) {
    fprintf(stderr, "Fail to open BAM file %s\n", inFName);
    return 1;
  }

  hts_set_threads(in, threads);
  hts_idx_t *idx;
  idx = bam_index_load(inFName); // load BAM index
  if (idx == 0) {
    fprintf(stderr, "BAM index file is not available.\n");
    return 1;
  }

  int i;
  for (i=0; i<Vector_getNumElement(slices); i++) {
    Slice *slice = Vector_getElementAt(slices,i);

    if (verbosity > 0) printf("Working on '%s'\n",Slice_getName(slice));

//    if (verbosity > 0) printf("Stage 1 - retrieving annotation from database\n");
//    Vector *genes = getGenes(slice, flags);

    if (verbosity > 0) printf("Stage 1 - calculating coverage\n");
    calcCoverage(inFName, slice, in, idx, flags);
  }


  hts_idx_destroy(idx);
  hts_close(in);

  if (verbosity > 0) printf("Done\n");
  return 0;
}
Esempio n. 19
0
Vector *GenomicAlignAdaptor_fetchAllByDNAFragGenomeDBDirect( GenomicAlignAdaptor *gaa, 
     DNAFrag *dnaFrag, GenomeDB *targetGenome, int *startP, int *endP, IDType methodLinkId) {
  IDType dnaFragId;
  GenomeDB *genomeDB;
  char *qStr = NULL;
  char tmpStr[512];
  Vector *results;
  StatementHandle *sth;
  int ok = 0;

  if (!dnaFrag) {
    fprintf(stderr, "Error: Input dnafrag must not be NULL\n");
    ok = 0;
  }

  if (ok) {
    // formatting the dnafrag
    dnaFragId = DNAFrag_getDbID(dnaFrag);

    genomeDB = DNAFrag_getGenomeDB(dnaFrag);

    StrUtil_copyString(&qStr,
                       "SELECT gab.consensus_dnafrag_id,"
                       "       gab.consensus_start," 
                       "       gab.consensus_end,"
                       "       gab.query_dnafrag_id," 
                       "       gab.query_start," 
                       "       gab.query_end,"
                       "       gab.query_strand,"
                       "       gab.method_link_id,"
                       "       gab.score,"
                       "       gab.perc_id," 
                       "       gab.cigar_line"
                       " FROM genomic_align_block gab ",0);

    if (targetGenome) {
      qStr = StrUtil_appendString(qStr,", dnafrag d");
    }
    sprintf(tmpStr," WHERE gab.method_link_id = " IDFMTSTR, methodLinkId);
    qStr = StrUtil_appendString(qStr,tmpStr);

    results = Vector_new();

    if (!targetGenome ||
        GenomeDB_hasQuery(genomeDB, targetGenome, methodLinkId)) {
      Vector *qres;

      sprintf(tmpStr," AND gab.consensus_dnafrag_id = " IDFMTSTR, dnaFragId);
      qStr = StrUtil_appendString(qStr, tmpStr);

      if (startP && endP) {
        int lowerBound = *startP - gaa->maxAlignmentLength;
        sprintf(tmpStr,
                " AND gab.consensus_start <= %d"
                " AND gab.consensus_start >= %d"
                " AND gab.consensus_end >= %d", *endP, lowerBound, *startP ) ;
        qStr = StrUtil_appendString(qStr, tmpStr);
      }

      if (targetGenome) {
        sprintf(tmpStr,
                " AND gab.query_dnafrag_id = d.dnafrag_id"
                " AND d.genome_db_id = " IDFMTSTR, GenomeDB_getDbID(targetGenome));
        qStr = StrUtil_appendString(qStr, tmpStr);
      }

      sth = gaa->prepare((BaseAdaptor *)gaa, qStr, strlen(qStr));
      sth->execute(sth);

      qres = GenomicAlignAdaptor_objectsFromStatementHandle(gaa, sth, 0);
      Vector_append(results,qres);
      Vector_free(qres);

      sth->finish(sth);
    }

    if (!targetGenome ||
        GenomeDB_hasConsensus(genomeDB, targetGenome, methodLinkId)) {
      Vector *cres;

      sprintf(tmpStr," AND gab.query_dnafrag_id = " IDFMTSTR, dnaFragId);
      qStr = StrUtil_appendString(qStr, tmpStr);

      if (startP && endP) {
        int lowerBound = *startP - gaa->maxAlignmentLength;
        sprintf(tmpStr,
                " AND gab.query_start <= %d"
                " AND gab.query_start >= %d"
                " AND gab.query_end >= %d", *endP, lowerBound, *startP ) ;
        qStr = StrUtil_appendString(qStr, tmpStr);
      }
      if (targetGenome) {
        sprintf(tmpStr,
                " AND gab.consensus_dnafrag_id = d.dnafrag_id"
                " AND d.genome_db_id = " IDFMTSTR, GenomeDB_getDbID(targetGenome));
        qStr = StrUtil_appendString(qStr, tmpStr);
      }
      sth = gaa->prepare((BaseAdaptor *)gaa, qStr, strlen(qStr));
      sth->execute(sth);

      cres = GenomicAlignAdaptor_objectsFromStatementHandle(gaa, sth, 1);
      Vector_append(results,cres);
      Vector_free(cres);

      sth->finish(sth);
    }
  }

  if (qStr)
    free(qStr);

  return results;
}