示例#1
0
文件: genret.c 项目: ktnyt/GEMBASSY
int main(int argc, char *argv[])
{
  embInitPV("genret", argc, argv, "GEMBASSY", "1.0.3");

  AjPSeqall seqall;
  AjPSeq seq      = NULL;
  AjPStr inseq    = NULL;
  AjPStr gene     = NULL;
  AjPStr access   = NULL;
  AjBool accid    = ajTrue;
  AjPStr argument = NULL;
  AjPFile outfile = NULL;

  AjPStr seqid  = NULL;
  AjPStr restid = NULL;

  AjBool valid = ajFalse;
  AjBool isseq = ajFalse;
  AjBool isgbk = ajFalse;

  AjPFilebuff buff = NULL;
  AjPFile  tmpfile = NULL;
  AjPStr   tmpname = NULL;

  AjPStr regexstr = NULL;
  AjPStrTok token = NULL;
  AjPRegexp regex = NULL;

  AjPStr url  = NULL;
  AjPStr base = NULL;
  AjPStr head = NULL;
  AjPStr line = NULL;

  seqall   = ajAcdGetSeqall("sequence");
  access   = ajAcdGetString("access");
  gene     = ajAcdGetString("gene");
  argument = ajAcdGetString("argument");
  accid    = ajAcdGetBoolean("accid");
  outfile  = ajAcdGetOutfile("outfile");

  if(
     ajStrMatchC(access, "translation") ||
     ajStrMatchC(access, "get_exon") ||
     ajStrMatchC(access, "get_exons") ||
     ajStrMatchC(access, "get_cdsseq") ||
     ajStrMatchC(access, "get_gbkseq") ||
     ajStrMatchC(access, "get_geneseq") ||
     ajStrMatchC(access, "get_intron") ||
     ajStrMatchC(access, "getseq") ||
     ajStrMatchC(access, "seq") ||
     ajStrMatchC(access, "around_startcodon") ||
     ajStrMatchC(access, "around_stopcodon") ||
     ajStrMatchC(access, "before_startcodon") ||
     ajStrMatchC(access, "before_stopcodon") ||
     ajStrMatchC(access, "after_startcodon") ||
     ajStrMatchC(access, "after_stopcodon")
     )
    {
      isseq = ajTrue;
    }
  else if(ajStrMatchC(access, "annotate") ||
          ajStrMatchC(access, "output"))
    {
      isgbk = ajTrue;
    }
  else
    {
      ajFmtPrintF(outfile, "gene,%S\n", access);
    }

  base = ajStrNewC("rest.g-language.org");

  ajStrExchangeCC(&argument, " ", "/");
  ajStrExchangeCC(&argument, ",", "/");
  ajStrExchangeCC(&argument, "\t", "/");
  ajStrExchangeCC(&argument, "\r", "/");
  ajStrExchangeCC(&argument, "\n", "/");

  if(ajStrMatchC(gene, "*"))
    {
      ajStrInsertK(&gene, 0, '.');
    }

  if(ajStrPrefixC(gene, "@") || ajStrPrefixC(gene, "list::"))
    {
      ajStrExchangeCC(&gene, "@", "");
      ajStrExchangeCC(&gene, "list::", "");
      ajStrAssignS(&tmpname, gene);

      tmpfile = ajFileNewInNameS(tmpname);

      if(!tmpfile)
        {
          ajDie("List file (%S) open error\n", tmpname);
        }

      gene = ajStrNew();

      while(ajReadline(tmpfile, &line))
        {
          ajStrAppendS(&gene, line);
        }

      ajFileClose(&tmpfile);
      ajStrDel(&tmpname);
      ajStrDel(&line);
    }

  tmpname = ajStrNew();
  gAssignUniqueName(&tmpname);

  while(ajSeqallNext(seqall, &seq))
    {
      inseq = ajStrNew();

      if(!accid)
        {
          if(gFormatGenbank(seq, &inseq))
            {
              tmpfile = ajFileNewOutNameS(tmpname);

              if(!tmpfile)
                {
                  ajDie("Output file (%S) open error\n", tmpname);
                }

              ajFmtPrintF(tmpfile, "%S", inseq);

              ajFileClose(&tmpfile);

              ajFmtPrintS(&url, "http://%S/upload/upl.pl", base);

              gFilePostSS(url, tmpname, &restid);

              ajStrDel(&url);

              ajSysFileUnlinkS(tmpname);
            }
          else
            {
              ajWarn("Sequence does not have features\n"
                     "Proceeding with sequence accession ID\n");
              accid = ajTrue;
            }
        }


      ajStrAssignS(&seqid, ajSeqGetAccS(seq));

      if(ajStrGetLen(seqid) == 0)
        {
          ajStrAssignS(&seqid, ajSeqGetNameS(seq));
        }

      if(ajStrGetLen(seqid) == 0)
        {
          ajWarn("No valid header information\n");
        }

      if(accid)
        {
          ajStrAssignS(&restid, seqid);
          if(ajStrGetLen(seqid) == 0)
            {
              ajDie("Cannot proceed without header with -accid\n");
            }

          if(!gValID(seqid))
            {
              ajDie("Invalid accession ID:%S, exiting\n", seqid);
            }
        }

      url = ajStrNew();

      if(isgbk)
        {
          ajFmtPrintS(&url, "http://%S/%S/%S", base, restid, access);
        }
      else
        {
          ajFmtPrintS(&url, "http://%S/%S/*/%S/%S", base, restid, access, argument);
        }

      if(!gFilebuffURLS(url, &buff))
        {
          ajDie("GET error from %S\n", url);
        }

      while(ajBuffreadLine(buff, &line))
        {
          if(isgbk){
            ajFmtPrintF(outfile, "%S", line);
            continue;
          }

          ajStrRemoveLastNewline(&line);

          regex = ajRegCompC("^>");

          if(ajRegExec(regex, line))
            {
              head = ajStrNew();

              ajStrAssignS(&head, line);
              ajStrTrimStartC(&head, ">");

              valid = ajFalse;

              token = ajStrTokenNewC(ajStrNewS(gene), " ,\t\r\n");

              while(ajStrTokenNextParse(token, &regexstr))
                {
                  if(ajStrGetLen(regexstr))
                    {
                      regex = ajRegComp(regexstr);

                      if(ajRegExec(regex, line))
                        {
                          valid = ajTrue;
                          if(ajStrIsAlnum(regexstr))
                            {
                              ajStrExchangeSC(&gene, regexstr, "");
                            }
                        }

                      ajRegFree(&regex);
                    }
                }
            }
          else
            {
              if(valid)
                {
                  if(isseq)
                    {
                      ajStrFmtWrap(&line, 60);
                      ajFmtPrintF(outfile, ">%S\n%S\n", head, line);
                    }
                  else
                    {
                      ajFmtPrintF(outfile, "%S,%S\n", head, line);
                    }

                  valid = ajFalse;
                }
            }
        }

      ajFileClose(&outfile);

      ajStrDel(&restid);
      ajStrDel(&seqid);
      ajStrDel(&inseq);
    }

  ajSeqallDel(&seqall);
  ajSeqDel(&seq);
  ajStrDel(&access);
  ajStrDel(&gene);

  embExit();
}
int main(int argc, char** argv)
{
    AjPFile outf = NULL;
    AjPFile cachef = NULL;

    AjIList iterator = NULL;
    AjPList aliases  = NULL;
    AjPList dbas     = NULL;
    AjPList species  = NULL;

    AjPStr alias   = NULL;
    AjPStr dbname  = NULL;
    AjPStr spname  = NULL;
    AjPStr svrname = NULL;
    AjPStr svrurl  = NULL;
    AjPStr dbcurl  = NULL;

    AjPTime svrtime = NULL;

    EnsEDatabaseadaptorGroup dbag = ensEDatabaseadaptorGroupNULL;

    EnsPDatabaseadaptor dba = NULL;

    EnsPDatabaseconnection dbc = NULL;

    embInit("cacheensembl", argc, argv);
    ensInit();

    svrname = ajAcdGetString("servername");
    outf    = ajAcdGetOutfile("outfile");
    cachef = ajAcdGetOutfile("cachefile");

    dbcurl = ajStrNew();
    svrurl = ajStrNew();
    dbname = ajStrNew();

    ajNamSvrGetUrl(svrname, &svrurl);

    if(!svrurl)
        ajFatal("Could not resolve server name '%S'.", svrname);

    dbc = ensDatabaseconnectionNewUrl(svrurl);
    ensRegistryLoadDatabaseconnection(dbc);
    ensDatabaseconnectionDel(&dbc);

    /* Write the server file header. */

    svrtime = ajTimeNewTodayFmt("cachefile");
    ajFmtPrintF(cachef, "# %S %D\n", ajFileGetNameS(cachef), svrtime);
    ajTimeDel(&svrtime);

    ajFmtPrintF(cachef,
                "# Automatically generated by cacheensembl "
                "for server '%S'.\n\n",
                svrname);

    /*
    ** Get all Ensembl Database Adaptor objects and write them as
    ** EMBOSS Database definitions.
    */

    aliases = ajListstrNew();
    dbas    = ajListNew();
    species = ajListstrNew();

    ensRegistryRetrieveAllSpecies(species);

    while(ajListstrPop(species, &spname))
    {
        ensRegistryGetAllDatabaseadaptors(ensEDatabaseadaptorGroupNULL,
                                          spname,
                                          dbas);

        while(ajListPop(dbas, (void**) &dba))
        {
            dbag = ensDatabaseadaptorGetGroup(dba);

            if(dbag == ensEDatabaseadaptorGroupNULL)
            {
                ajDebug("cacheensembl main got unexpected "
                        "Ensembl Database Adaptor Group %d.\n",
                        dbag);

                continue;
            }

            ajStrAssignS(&dbname, ensDatabaseadaptorGetSpecies(dba));

            if(dbag != ensEDatabaseadaptorGroupCore)
            {
                ajStrAppendC(&dbname, "_");
                ajStrAppendC(&dbname, ensDatabaseadaptorGroupToChar(dbag));
            }

            dbc = ensDatabaseadaptorGetDatabaseconnection(dba);

            ensDatabaseconnectionFetchUrl(dbc, &dbcurl);

            if(outf)
                ajFmtPrintF(outf, "%S\n", dbname);

            ajFmtPrintF(cachef, "DBNAME %S [\n", dbname);
            ajFmtPrintF(cachef, "  release: \"%s\"\n", ensSoftwareGetVersion());
            ajFmtPrintF(cachef, "  server:  \"%S\"\n", svrname);
            ajFmtPrintF(cachef, "  url:     \"%S\"\n", dbcurl);
            ajFmtPrintF(cachef, "]\n");
            ajFmtPrintF(cachef, "\n");

            if(dbag != ensEDatabaseadaptorGroupCore)
                continue;

            ensRegistryAliasFetchAllbySpecies(
                ensDatabaseadaptorGetSpecies(dba),
                aliases);

            /*
            ** Format all aliases to lower case,
            ** sort them alphabetically and remove duplicates.
            */

            iterator = ajListIterNew(aliases);
            while(!ajListIterDone(iterator))
            {
                alias = ajListstrIterGet(iterator);
                ajStrFmtLower(&alias);
            }
            ajListIterDel(&iterator);

            ajListSortUnique(aliases,
                             cacheensembl_stringcompare,
                             cacheensembl_stringdelete);

            alias = NULL;
            if(ajListGetLength(aliases) > 0)
            {
                while(ajListstrPop(aliases, &alias))
                {
                    /*
                    ** Reject any aliases with other than alpha-numeric
                    ** characters like white space.
                    */

                    if(ajStrIsAlnum(alias))
                        ajFmtPrintF(cachef,
                                    "ALIAS %S %S\n",
                                    alias,
                                    ensDatabaseadaptorGetSpecies(dba));

                    ajStrDel(&alias);
                }

                ajFmtPrintF(cachef, "\n");
            }

            /* Ensembl Database Adaptor objects *must not* be deleted. */
        }

        ajStrDel(&spname);
    }

    ajListstrFree(&aliases);
    ajListFree(&dbas);

    ajStrDel(&dbcurl);
    ajStrDel(&svrurl);
    ajStrDel(&dbname);
    ajStrDel(&svrname);

    ajFileClose(&outf);
    ajFileClose(&cachef);

    embExit();

    return EXIT_SUCCESS;
}