gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet) { int nsymbols; gk_idx_t i; gk_i2cc2i_t *t; nsymbols = strlen(alphabet); t = gk_malloc(sizeof(gk_i2cc2i_t),"gk_i2c_create_common"); t->n = nsymbols; t->i2c = gk_cmalloc(256, "gk_i2c_create_common"); t->c2i = gk_imalloc(256, "gk_i2c_create_common"); gk_cset(256, -1, t->i2c); gk_iset(256, -1, t->c2i); for(i=0;i<nsymbols;i++){ t->i2c[i] = alphabet[i]; t->c2i[(int)alphabet[i]] = i; } return t; }
/************************************************************************* * This function reads the spd matrix **************************************************************************/ void ReadCoordinates(GraphType *graph, char *filename) { idxtype i, j, k, l, nvtxs, fmt, readew, readvw, ncon, edge, ewgt; FILE *fpin; char *line; fpin = gk_fopen(filename, "r", __func__); nvtxs = graph->nvtxs; graph->coords = gk_dsmalloc(3*nvtxs, 0.0, "ReadCoordinates: coords"); line = gk_cmalloc(MAXLINE+1, "ReadCoordinates: line"); for (i=0; i<nvtxs; i++) { fgets(line, MAXLINE, fpin); msscanf(line, "%lf %lf %lf", graph->coords+3*i+0, graph->coords+3*i+1, graph->coords+3*i+2); } gk_fclose(fpin); gk_free((void **)&line, LTERM); }
gk_seq_t *gk_seq_ReadGKMODPSSM(char *filename) { gk_seq_t *seq; gk_idx_t i, j, ii; size_t ntokens, nbytes, len; FILE *fpin; gk_Tokens_t tokens; static char *AAORDER = "ARNDCQEGHILKMFPSTWYVBZX*"; static int PSSMWIDTH = 20; char *header, line[MAXLINELEN]; gk_i2cc2i_t *converter; header = gk_cmalloc(PSSMWIDTH, "gk_seq_ReadGKMODPSSM: header"); converter = gk_i2cc2i_create_common(AAORDER); gk_getfilestats(filename, &len, &ntokens, NULL, &nbytes); len --; seq = gk_malloc(sizeof(gk_seq_t),"gk_seq_ReadGKMODPSSM"); gk_seq_init(seq); seq->len = len; seq->sequence = gk_imalloc(len, "gk_seq_ReadGKMODPSSM"); seq->pssm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM"); seq->psfm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM"); seq->nsymbols = PSSMWIDTH; seq->name = gk_getbasename(filename); fpin = gk_fopen(filename,"r","gk_seq_ReadGKMODPSSM"); /* Read the header line */ if (fgets(line, MAXLINELEN-1, fpin) == NULL) errexit("Unexpected end of file: %s\n", filename); gk_strtoupper(line); gk_strtokenize(line, " \t\n", &tokens); for (i=0; i<PSSMWIDTH; i++) header[i] = tokens.list[i][0]; gk_freetokenslist(&tokens); /* Read the rest of the lines */ for (i=0, ii=0; ii<len; ii++) { if (fgets(line, MAXLINELEN-1, fpin) == NULL) errexit("Unexpected end of file: %s\n", filename); gk_strtoupper(line); gk_strtokenize(line, " \t\n", &tokens); seq->sequence[i] = converter->c2i[(int)tokens.list[1][0]]; for (j=0; j<PSSMWIDTH; j++) { seq->pssm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+j]); seq->psfm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+PSSMWIDTH+j]); } gk_freetokenslist(&tokens); i++; } seq->len = i; /* Reset the length if certain characters were skipped */ gk_free((void **)&header, LTERM); gk_fclose(fpin); return seq; }
int gk_strstr_replace(char *str, char *pattern, char *replacement, char *options, char **new_str) { gk_idx_t i; int j, rc, flags, global, nmatches; size_t len, rlen, nlen, offset, noffset; regex_t re; regmatch_t matches[10]; /* Parse the options */ flags = REG_EXTENDED; if (strchr(options, 'i') != NULL) flags = flags | REG_ICASE; global = (strchr(options, 'g') != NULL ? 1 : 0); /* Compile the regex */ if ((rc = regcomp(&re, pattern, flags)) != 0) { len = regerror(rc, &re, NULL, 0); *new_str = gk_cmalloc(len, "gk_strstr_replace: new_str"); regerror(rc, &re, *new_str, len); return 0; } /* Prepare the output string */ len = strlen(str); nlen = 2*len; noffset = 0; *new_str = gk_cmalloc(nlen+1, "gk_strstr_replace: new_str"); /* Get into the matching-replacing loop */ rlen = strlen(replacement); offset = 0; nmatches = 0; do { rc = regexec(&re, str+offset, 10, matches, 0); if (rc == REG_ESPACE) { gk_free((void **)new_str, LTERM); *new_str = gk_strdup("regexec ran out of memory."); regfree(&re); return 0; } else if (rc == REG_NOMATCH) { if (nlen-noffset < len-offset) { nlen += (len-offset) - (nlen-noffset); *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str"); } strcpy(*new_str+noffset, str+offset); noffset += (len-offset); break; } else { /* A match was found! */ nmatches++; /* Copy the left unmatched portion of the string */ if (matches[0].rm_so > 0) { if (nlen-noffset < matches[0].rm_so) { nlen += matches[0].rm_so - (nlen-noffset); *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str"); } strncpy(*new_str+noffset, str+offset, matches[0].rm_so); noffset += matches[0].rm_so; } /* Go and append the replacement string */ for (i=0; i<rlen; i++) { switch (replacement[i]) { case '\\': if (i+1 < rlen) { if (nlen-noffset < 1) { nlen += nlen + 1; *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str"); } *new_str[noffset++] = replacement[++i]; } else { gk_free((void **)new_str, LTERM); *new_str = gk_strdup("Error in replacement string. Missing character following '\'."); regfree(&re); return 0; } break; case '$': if (i+1 < rlen) { j = (int)(replacement[++i] - '0'); if (j < 0 || j > 9) { gk_free((void **)new_str, LTERM); *new_str = gk_strdup("Error in captured subexpression specification."); regfree(&re); return 0; } if (nlen-noffset < matches[j].rm_eo-matches[j].rm_so) { nlen += nlen + (matches[j].rm_eo-matches[j].rm_so); *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str"); } strncpy(*new_str+noffset, str+offset+matches[j].rm_so, matches[j].rm_eo); noffset += matches[j].rm_eo-matches[j].rm_so; } else { gk_free((void **)new_str, LTERM); *new_str = gk_strdup("Error in replacement string. Missing subexpression number folloing '$'."); regfree(&re); return 0; } break; default: if (nlen-noffset < 1) { nlen += nlen + 1; *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str"); } (*new_str)[noffset++] = replacement[i]; } } /* Update the offset of str for the next match */ offset += matches[0].rm_eo; if (!global) { /* Copy the right portion of the string if no 'g' option */ if (nlen-noffset < len-offset) { nlen += (len-offset) - (nlen-noffset); *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str"); } strcpy(*new_str+noffset, str+offset); noffset += (len-offset); } } } while (global); (*new_str)[noffset] = '\0'; regfree(&re); return nmatches + 1; }
/************************************************************************* * This function reads the spd matrix **************************************************************************/ void ReadGraph(GraphType *graph, char *filename, idxtype *wgtflag) { idxtype i, j, k, l, fmt, readew, readvw, ncon, edge, ewgt; idxtype *xadj, *adjncy, *vwgt, *adjwgt; char *line, *oldstr, *newstr; FILE *fpin; InitGraph(graph); line = gk_cmalloc(MAXLINE+1, "ReadGraph: line"); fpin = gk_fopen(filename, "r", __func__); do { fgets(line, MAXLINE, fpin); } while (line[0] == '%' && !feof(fpin)); if (feof(fpin)) { graph->nvtxs = 0; gk_free((void **)&line, LTERM); return; } fmt = ncon = 0; msscanf(line, "%D %D %D %D", &(graph->nvtxs), &(graph->nedges), &fmt, &ncon); readew = (fmt%10 > 0); readvw = ((fmt/10)%10 > 0); if (fmt >= 100) { mprintf("Cannot read this type of file format!"); exit(0); } *wgtflag = 0; if (readew) *wgtflag += 1; if (readvw) *wgtflag += 2; if (ncon > 0 && !readvw) { mprintf("------------------------------------------------------------------------------\n"); mprintf("*** I detected an error in your input file ***\n\n"); mprintf("You specified ncon=%D, but the fmt parameter does not specify vertex weights\n", ncon); mprintf("Make sure that the fmt parameter is set to either 10 or 11.\n"); mprintf("------------------------------------------------------------------------------\n"); exit(0); } graph->nedges *=2; ncon = graph->ncon = (ncon == 0 ? 1 : ncon); /*mprintf("%D %D %D %D %D [%D %D]\n", fmt, fmt%10, (fmt/10)%10, ncon, graph->ncon, readew, readvw);*/ if (graph->nvtxs > MAXIDX) errexit("\nThe matrix is too big: %d [%d %d]\n", graph->nvtxs, MAXIDX, sizeof(idxtype)); xadj = graph->xadj = idxsmalloc(graph->nvtxs+1, 0, "ReadGraph: xadj"); adjncy = graph->adjncy = idxmalloc(graph->nedges, "ReadGraph: adjncy"); vwgt = graph->vwgt = (readvw ? idxmalloc(ncon*graph->nvtxs, "ReadGraph: vwgt") : NULL); adjwgt = graph->adjwgt = (readew ? idxmalloc(graph->nedges, "ReadGraph: adjwgt") : NULL); /* Start reading the graph file */ for (xadj[0]=0, k=0, i=0; i<graph->nvtxs; i++) { do { fgets(line, MAXLINE, fpin); } while (line[0] == '%' && !feof(fpin)); oldstr = line; newstr = NULL; if (strlen(line) == MAXLINE) errexit("\nBuffer for fgets not big enough!\n"); if (readvw) { for (l=0; l<ncon; l++) { vwgt[i*ncon+l] = strtoidx(oldstr, &newstr, 10); oldstr = newstr; } } for (;;) { edge = strtoidx(oldstr, &newstr, 10) -1; oldstr = newstr; if (readew) { ewgt = strtoidx(oldstr, &newstr, 10); oldstr = newstr; } if (edge < 0) break; adjncy[k] = edge; if (readew) adjwgt[k] = ewgt; k++; } xadj[i+1] = k; } gk_fclose(fpin); if (k != graph->nedges) { mprintf("------------------------------------------------------------------------------\n"); mprintf("*** I detected an error in your input file ***\n\n"); mprintf("In the first line of the file, you specified that the graph contained\n%D edges. However, I only found %D edges in the file.\n", graph->nedges/2, k/2); if (2*k == graph->nedges) { mprintf("\n *> I detected that you specified twice the number of edges that you have in\n"); mprintf(" the file. Remember that the number of edges specified in the first line\n"); mprintf(" counts each edge between vertices v and u only once.\n\n"); } mprintf("Please specify the correct number of edges in the first line of the file.\n"); mprintf("------------------------------------------------------------------------------\n"); exit(0); } gk_free((void **)&line, LTERM); }