gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet)
{
    
    
    int nsymbols;
    gk_idx_t i;
    gk_i2cc2i_t *t;

    nsymbols = strlen(alphabet);
    t        = gk_malloc(sizeof(gk_i2cc2i_t),"gk_i2c_create_common");
    t->n     = nsymbols;
    t->i2c   = gk_cmalloc(256, "gk_i2c_create_common");
    t->c2i   = gk_imalloc(256, "gk_i2c_create_common");
    

    gk_cset(256, -1, t->i2c);
    gk_iset(256, -1, t->c2i);
    
    for(i=0;i<nsymbols;i++){
	t->i2c[i] = alphabet[i];
	t->c2i[(int)alphabet[i]] = i;
    }

    return t;

}
Example #2
0
/*************************************************************************
* This function reads the spd matrix
**************************************************************************/
void ReadCoordinates(GraphType *graph, char *filename)
{
  idxtype i, j, k, l, nvtxs, fmt, readew, readvw, ncon, edge, ewgt;
  FILE *fpin;
  char *line;


  fpin = gk_fopen(filename, "r", __func__);

  nvtxs = graph->nvtxs;

  graph->coords = gk_dsmalloc(3*nvtxs, 0.0, "ReadCoordinates: coords");

  line = gk_cmalloc(MAXLINE+1, "ReadCoordinates: line");

  for (i=0; i<nvtxs; i++) {
    fgets(line, MAXLINE, fpin);
    msscanf(line, "%lf %lf %lf", graph->coords+3*i+0, graph->coords+3*i+1, graph->coords+3*i+2);
  }
    
  gk_fclose(fpin);

  gk_free((void **)&line, LTERM);
}
gk_seq_t *gk_seq_ReadGKMODPSSM(char *filename)
{
    gk_seq_t *seq;
    gk_idx_t i, j, ii;
    size_t ntokens, nbytes, len;
    FILE *fpin;
    
    
    gk_Tokens_t tokens;
    static char *AAORDER = "ARNDCQEGHILKMFPSTWYVBZX*";
    static int PSSMWIDTH = 20;
    char *header, line[MAXLINELEN];
    gk_i2cc2i_t *converter;

    header = gk_cmalloc(PSSMWIDTH, "gk_seq_ReadGKMODPSSM: header");
    
    converter = gk_i2cc2i_create_common(AAORDER);
    
    gk_getfilestats(filename, &len, &ntokens, NULL, &nbytes);
    len --;

    seq = gk_malloc(sizeof(gk_seq_t),"gk_seq_ReadGKMODPSSM");
    gk_seq_init(seq);
    
    seq->len = len;
    seq->sequence = gk_imalloc(len, "gk_seq_ReadGKMODPSSM");
    seq->pssm     = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
    seq->psfm     = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
    
    seq->nsymbols = PSSMWIDTH;
    seq->name     = gk_getbasename(filename);
    
    fpin = gk_fopen(filename,"r","gk_seq_ReadGKMODPSSM");


    /* Read the header line */
    if (fgets(line, MAXLINELEN-1, fpin) == NULL)
      errexit("Unexpected end of file: %s\n", filename);
    gk_strtoupper(line);
    gk_strtokenize(line, " \t\n", &tokens);

    for (i=0; i<PSSMWIDTH; i++)
	header[i] = tokens.list[i][0];
    
    gk_freetokenslist(&tokens);
    

    /* Read the rest of the lines */
    for (i=0, ii=0; ii<len; ii++) {
	if (fgets(line, MAXLINELEN-1, fpin) == NULL)
          errexit("Unexpected end of file: %s\n", filename);
	gk_strtoupper(line);
	gk_strtokenize(line, " \t\n", &tokens);
	
	seq->sequence[i] = converter->c2i[(int)tokens.list[1][0]];
	
	for (j=0; j<PSSMWIDTH; j++) {
	    seq->pssm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+j]);
	    seq->psfm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+PSSMWIDTH+j]);
	}
	
      
	
	gk_freetokenslist(&tokens);
	i++;
    }
    
    seq->len = i; /* Reset the length if certain characters were skipped */
    
    gk_free((void **)&header, LTERM);
    gk_fclose(fpin);

    return seq;
}
Example #4
0
int gk_strstr_replace(char *str, char *pattern, char *replacement, char *options,
      char **new_str)
{
  gk_idx_t i; 
  int j, rc, flags, global, nmatches;
  size_t len, rlen, nlen, offset, noffset;
  regex_t re;
  regmatch_t matches[10];

  
  /* Parse the options */
  flags = REG_EXTENDED;
  if (strchr(options, 'i') != NULL)
    flags = flags | REG_ICASE;
  global = (strchr(options, 'g') != NULL ? 1 : 0);


  /* Compile the regex */
  if ((rc = regcomp(&re, pattern, flags)) != 0) { 
    len = regerror(rc, &re, NULL, 0);
    *new_str = gk_cmalloc(len, "gk_strstr_replace: new_str");
    regerror(rc, &re, *new_str, len);
    return 0;
  }

  /* Prepare the output string */
  len = strlen(str);
  nlen = 2*len;
  noffset = 0;
  *new_str = gk_cmalloc(nlen+1, "gk_strstr_replace: new_str");


  /* Get into the matching-replacing loop */
  rlen = strlen(replacement);
  offset = 0;
  nmatches = 0;
  do {
    rc = regexec(&re, str+offset, 10, matches, 0);

    if (rc == REG_ESPACE) {
      gk_free((void **)new_str, LTERM);
      *new_str = gk_strdup("regexec ran out of memory.");
      regfree(&re);
      return 0;
    }
    else if (rc == REG_NOMATCH) {
      if (nlen-noffset < len-offset) {
        nlen += (len-offset) - (nlen-noffset);
        *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
      }
      strcpy(*new_str+noffset, str+offset);
      noffset += (len-offset);
      break;
    }
    else { /* A match was found! */
      nmatches++;

      /* Copy the left unmatched portion of the string */
      if (matches[0].rm_so > 0) {
        if (nlen-noffset < matches[0].rm_so) {
          nlen += matches[0].rm_so - (nlen-noffset);
          *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
        }
        strncpy(*new_str+noffset, str+offset, matches[0].rm_so);
        noffset += matches[0].rm_so;
      }

      /* Go and append the replacement string */
      for (i=0; i<rlen; i++) {
        switch (replacement[i]) {
          case '\\':
            if (i+1 < rlen) {
              if (nlen-noffset < 1) {
                nlen += nlen + 1;
                *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
              }
              *new_str[noffset++] = replacement[++i];
            }
            else {
              gk_free((void **)new_str, LTERM);
              *new_str = gk_strdup("Error in replacement string. Missing character following '\'.");
              regfree(&re);
              return 0;
            }
            break;

          case '$':
            if (i+1 < rlen) {
              j = (int)(replacement[++i] - '0');
              if (j < 0 || j > 9) {
                gk_free((void **)new_str, LTERM);
                *new_str = gk_strdup("Error in captured subexpression specification.");
                regfree(&re);
                return 0;
              }

              if (nlen-noffset < matches[j].rm_eo-matches[j].rm_so) {
                nlen += nlen + (matches[j].rm_eo-matches[j].rm_so);
                *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
              }

              strncpy(*new_str+noffset, str+offset+matches[j].rm_so, matches[j].rm_eo);
              noffset += matches[j].rm_eo-matches[j].rm_so;
            }
            else {
              gk_free((void **)new_str, LTERM);
              *new_str = gk_strdup("Error in replacement string. Missing subexpression number folloing '$'.");
              regfree(&re);
              return 0;
            }
            break;

          default:
            if (nlen-noffset < 1) {
              nlen += nlen + 1;
              *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
            }
            (*new_str)[noffset++] = replacement[i];
        }
      }

      /* Update the offset of str for the next match */
      offset += matches[0].rm_eo;

      if (!global) {
        /* Copy the right portion of the string if no 'g' option */
        if (nlen-noffset < len-offset) {
          nlen += (len-offset) - (nlen-noffset);
          *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
        }
        strcpy(*new_str+noffset, str+offset);
        noffset += (len-offset);
      }
    }
  } while (global);

  (*new_str)[noffset] = '\0';

  regfree(&re);
  return nmatches + 1;

}
Example #5
0
/*************************************************************************
* This function reads the spd matrix
**************************************************************************/
void ReadGraph(GraphType *graph, char *filename, idxtype *wgtflag)
{
  idxtype i, j, k, l, fmt, readew, readvw, ncon, edge, ewgt;
  idxtype *xadj, *adjncy, *vwgt, *adjwgt;
  char *line, *oldstr, *newstr;
  FILE *fpin;

  InitGraph(graph);

  line = gk_cmalloc(MAXLINE+1, "ReadGraph: line");

  fpin = gk_fopen(filename, "r", __func__);

  do {
    fgets(line, MAXLINE, fpin);
  } while (line[0] == '%' && !feof(fpin));

  if (feof(fpin)) {
    graph->nvtxs = 0;
    gk_free((void **)&line, LTERM);
    return;
  }

  fmt = ncon = 0;
  msscanf(line, "%D %D %D %D", &(graph->nvtxs), &(graph->nedges), &fmt, &ncon);


  readew = (fmt%10 > 0);
  readvw = ((fmt/10)%10 > 0);
  if (fmt >= 100) {
    mprintf("Cannot read this type of file format!");
    exit(0);
  }


  *wgtflag = 0;
  if (readew)
    *wgtflag += 1;
  if (readvw)
    *wgtflag += 2;

  if (ncon > 0 && !readvw) {
    mprintf("------------------------------------------------------------------------------\n");
    mprintf("***  I detected an error in your input file  ***\n\n");
    mprintf("You specified ncon=%D, but the fmt parameter does not specify vertex weights\n", ncon);
    mprintf("Make sure that the fmt parameter is set to either 10 or 11.\n");
    mprintf("------------------------------------------------------------------------------\n");
    exit(0);
  }

  graph->nedges *=2;
  ncon = graph->ncon = (ncon == 0 ? 1 : ncon);

  /*mprintf("%D %D %D %D %D [%D %D]\n", fmt, fmt%10, (fmt/10)%10, ncon, graph->ncon, readew, readvw);*/

  if (graph->nvtxs > MAXIDX) 
    errexit("\nThe matrix is too big: %d [%d %d]\n", graph->nvtxs, MAXIDX, sizeof(idxtype));

  xadj = graph->xadj = idxsmalloc(graph->nvtxs+1, 0, "ReadGraph: xadj");
  adjncy = graph->adjncy = idxmalloc(graph->nedges, "ReadGraph: adjncy");

  vwgt = graph->vwgt = (readvw ? idxmalloc(ncon*graph->nvtxs, "ReadGraph: vwgt") : NULL);
  adjwgt = graph->adjwgt = (readew ? idxmalloc(graph->nedges, "ReadGraph: adjwgt") : NULL);

  /* Start reading the graph file */
  for (xadj[0]=0, k=0, i=0; i<graph->nvtxs; i++) {
    do {
      fgets(line, MAXLINE, fpin);
    } while (line[0] == '%' && !feof(fpin));
    oldstr = line;
    newstr = NULL;

    if (strlen(line) == MAXLINE) 
      errexit("\nBuffer for fgets not big enough!\n");

    if (readvw) {
      for (l=0; l<ncon; l++) {
        vwgt[i*ncon+l] = strtoidx(oldstr, &newstr, 10);
        oldstr = newstr;
      }
    }

    for (;;) {
      edge = strtoidx(oldstr, &newstr, 10) -1;
      oldstr = newstr;

      if (readew) {
        ewgt = strtoidx(oldstr, &newstr, 10);
        oldstr = newstr;
      }

      if (edge < 0)
        break;

      adjncy[k] = edge;
      if (readew) 
        adjwgt[k] = ewgt;
      k++;
    } 
    xadj[i+1] = k;
  }

  gk_fclose(fpin);

  if (k != graph->nedges) {
    mprintf("------------------------------------------------------------------------------\n");
    mprintf("***  I detected an error in your input file  ***\n\n");
    mprintf("In the first line of the file, you specified that the graph contained\n%D edges. However, I only found %D edges in the file.\n", graph->nedges/2, k/2);
    if (2*k == graph->nedges) {
      mprintf("\n *> I detected that you specified twice the number of edges that you have in\n");
      mprintf("    the file. Remember that the number of edges specified in the first line\n");
      mprintf("    counts each edge between vertices v and u only once.\n\n");
    }
    mprintf("Please specify the correct number of edges in the first line of the file.\n");
    mprintf("------------------------------------------------------------------------------\n");
    exit(0);
  }

  gk_free((void **)&line, LTERM);
}