/************************************************************************* * This function writes out the partition vectors for a mesh **************************************************************************/ void WriteMeshPartition(char *fname, idxtype nparts, idxtype ne, idxtype *epart, idxtype nn, idxtype *npart) { FILE *fpout; idxtype i; char filename[256]; msprintf(filename,"%s.epart.%D",fname, nparts); fpout = gk_fopen(filename, "w", __func__); for (i=0; i<ne; i++) fprintf(fpout,"%" PRIIDX "\n", epart[i]); gk_fclose(fpout); msprintf(filename,"%s.npart.%D",fname, nparts); fpout = gk_fopen(filename, "w", __func__); for (i=0; i<nn; i++) fprintf(fpout, "%" PRIIDX "\n", npart[i]); gk_fclose(fpout); }
/************************************************************************* * This function reads the element node array of a Mixed mesh with weight **************************************************************************/ idxtype *ReadMixedMeshWgt(char *filename, idxtype *ne, idxtype *nn, idxtype *etype, idxtype *vwgt) { idxtype i, j, k, esize; idxtype *elmnts; FILE *fpin; idxtype sizes[]={-1,3,4,8,4,2}; fpin = gk_fopen(filename, "r", __func__); mfscanf(fpin, "%D", ne); mfscanf(fpin, "%D", nn); elmnts = idxmalloc(8*(*ne), "ReadMixedMeshWgt: elmnts"); for (j=0, i=0; i<*ne; i++) { mfscanf(fpin, "%D",etype+i); mfscanf(fpin, "%D",vwgt+i); for (k=0;k<sizes[etype[i]];k++) { mfscanf(fpin, "%D", elmnts+j); elmnts[j++]--; } } gk_fclose(fpin); *nn = elmnts[idxargmax(j, elmnts)]+1; return elmnts; }
int64_t *gk_i64readfile(char *fname, gk_idx_t *r_nlines) { size_t lnlen, nlines; char *line=NULL; int64_t *array=NULL; FILE *fpin; gk_getfilestats(fname, &nlines, NULL, NULL, NULL); if (nlines > 0) { array = gk_i64malloc(nlines, "gk_i64readfile: array"); fpin = gk_fopen(fname, "r", "gk_readfile"); nlines = 0; while (gk_getline(&line, &lnlen, fpin) != -1) { sscanf(line, "%"SCNd64, &array[nlines++]); } gk_fclose(fpin); } gk_free((void **)&line, LTERM); if (r_nlines != NULL) *r_nlines = nlines; return array; }
/************************************************************************* * This function writes a graphs into a file **************************************************************************/ void WriteMocGraph(GraphType *graph) { idxtype i, j, nvtxs, ncon; idxtype *xadj, *adjncy; float *nvwgt; char filename[256]; FILE *fpout; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; adjncy = graph->adjncy; nvwgt = graph->nvwgt; msprintf(filename, "moc.graph.%D.%D", nvtxs, ncon); fpout = gk_fopen(filename, "w", __func__); mfprintf(fpout, "%D %D 10 1 %D", nvtxs, xadj[nvtxs]/2, ncon); for (i=0; i<nvtxs; i++) { mfprintf(fpout, "\n"); for (j=0; j<ncon; j++) fprintf(fpout, "%" PRIIDX " ", (int)((float)10e6*nvwgt[i*ncon+j])); for (j=xadj[i]; j<xadj[i+1]; j++) fprintf(fpout, " %" PRIIDX, adjncy[j]+1); } gk_fclose(fpout); }
/************************************************************************* * This function gets some basic statistics about the file **************************************************************************/ void gk_getfilestats(char *fname, int *r_nlines, int *r_ntokens, int *r_nbytes) { int nlines, ntokens, nbytes; size_t lnlen; FILE *fpin; char *line=NULL, delim[] = " \t", *token; fpin = gk_fopen(fname, "r", "gk_GetFileStats"); nlines = ntokens = nbytes = 0; while (gk_getline(&line, &lnlen, fpin)) { nlines++; nbytes += strlen(line); token = strtok(line, delim); while (token) { ntokens++; token = strtok(NULL, delim); } } gk_fclose(fpin); *r_nlines = nlines; *r_ntokens = ntokens; *r_nbytes = nbytes; gk_free((void *)&line, LTERM); }
void gk_writecentersofmass(pdbf *p, char *fname) { int i; FILE *FPIN; FPIN = gk_fopen(fname,"w",fname); for(i=0; i<p->nresidues; i++) { fprintf(FPIN,"%-6s%5d %4s%1c%3s %1c%4d%1c %8.3lf%8.3lf%8.3lf%6.2f%6.2f\n", "ATOM ",i,"CA",' ',p->threeresSeq[i],' ',i,' ',p->cm[i].x,p->cm[i].y,p->cm[i].z,1.0,-37.0); } fclose(FPIN); }
void gk_writealphacarbons(pdbf *p, char *fname) { int i; FILE *FPIN; FPIN = gk_fopen(fname,"w",fname); for(i=0; i<p->ncas; i++) { fprintf(FPIN,"%-6s%5d %4s%1c%3s %1c%4d%1c %8.3lf%8.3lf%8.3lf%6.2f%6.2f\n", "ATOM ",p->cas[i]->serial,p->cas[i]->name,p->cas[i]->altLoc,p->cas[i]->resname,p->cas[i]->chainid,p->cas[i]->rserial,p->cas[i]->icode,p->cas[i]->x,p->cas[i]->y,p->cas[i]->z,p->cas[i]->opcy,p->cas[i]->tmpt); } fclose(FPIN); }
void gk_writefullatom(pdbf *p, char *fname) { int i; FILE *FPIN; FPIN = gk_fopen(fname,"w",fname); for(i=0; i<p->natoms; i++) { fprintf(FPIN,"%-6s%5d %4s%1c%3s %1c%4d%1c %8.3lf%8.3lf%8.3lf%6.2f%6.2f\n", "ATOM ",p->atoms[i].serial,p->atoms[i].name,p->atoms[i].altLoc,p->atoms[i].resname,p->atoms[i].chainid,p->atoms[i].rserial,p->atoms[i].icode,p->atoms[i].x,p->atoms[i].y,p->atoms[i].z,p->atoms[i].opcy,p->atoms[i].tmpt); } fclose(FPIN); }
void gk_graph_Write(gk_graph_t *graph, char *filename, int format) { ssize_t i, j; int hasvwgts, hasvsizes, hasewgts; FILE *fpout; if (format != GK_GRAPH_FMT_METIS) gk_errexit(SIGERR, "Unknown file format. %d\n", format); if (filename) fpout = gk_fopen(filename, "w", "gk_graph_Write: fpout"); else fpout = stdout; hasewgts = (graph->iadjwgt || graph->fadjwgt); hasvwgts = (graph->ivwgts || graph->fvwgts); hasvsizes = (graph->ivsizes || graph->fvsizes); /* write the header line */ fprintf(fpout, "%d %zd", graph->nvtxs, graph->xadj[graph->nvtxs]/2); if (hasvwgts || hasvsizes || hasewgts) fprintf(fpout, " %d%d%d", hasvsizes, hasvwgts, hasewgts); fprintf(fpout, "\n"); for (i=0; i<graph->nvtxs; i++) { if (hasvsizes) { if (graph->ivsizes) fprintf(fpout, " %d", graph->ivsizes[i]); else fprintf(fpout, " %f", graph->fvsizes[i]); } if (hasvwgts) { if (graph->ivwgts) fprintf(fpout, " %d", graph->ivwgts[i]); else fprintf(fpout, " %f", graph->fvwgts[i]); } for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) { fprintf(fpout, " %d", graph->adjncy[j]+1); if (hasewgts) { if (graph->iadjwgt) fprintf(fpout, " %d", graph->iadjwgt[j]); else fprintf(fpout, " %f", graph->fadjwgt[j]); } } fprintf(fpout, "\n"); } if (filename) gk_fclose(fpout); }
/************************************************************************* * This function reads # of element of a mixed mesh **************************************************************************/ idxtype MixedElements(char *filename) { idxtype ne; FILE *fpin; fpin = gk_fopen(filename, "r", __func__); mfscanf(fpin, "%D", &ne); gk_fclose(fpin); return ne; }
void gk_writefastafrompdb(pdbf *pb, char *fname) { int i; FILE *FPOUT; FPOUT = gk_fopen(fname,"w",fname); fprintf(FPOUT,"> %s\n",fname); for(i=0; i<pb->nresidues; i++) fprintf(FPOUT,"%c",pb->resSeq[i]); fprintf(FPOUT,"\n"); fclose(FPOUT); }
void gk_getfilestats(char *fname, size_t *r_nlines, size_t *r_ntokens, size_t *r_max_nlntokens, size_t *r_nbytes) { size_t nlines=0, ntokens=0, max_nlntokens=0, nbytes=0, oldntokens=0, nread; int intoken=0; char buffer[2049], *cptr; FILE *fpin; fpin = gk_fopen(fname, "r", "gk_GetFileStats"); while (!feof(fpin)) { nread = fread(buffer, sizeof(char), 2048, fpin); nbytes += nread; buffer[nread] = '\0'; /* There is space for this one */ for (cptr=buffer; *cptr!='\0'; cptr++) { if (*cptr == '\n') { nlines++; ntokens += intoken; intoken = 0; if (max_nlntokens < ntokens-oldntokens) max_nlntokens = ntokens-oldntokens; oldntokens = ntokens; } else if (*cptr == ' ' || *cptr == '\t') { ntokens += intoken; intoken = 0; } else { intoken = 1; } } } ntokens += intoken; if (max_nlntokens < ntokens-oldntokens) max_nlntokens = ntokens-oldntokens; gk_fclose(fpin); if (r_nlines != NULL) *r_nlines = nlines; if (r_ntokens != NULL) *r_ntokens = ntokens; if (r_max_nlntokens != NULL) *r_max_nlntokens = max_nlntokens; if (r_nbytes != NULL) *r_nbytes = nbytes; }
/************************************************************************* * This function reads the element node array of a mesh with weight **************************************************************************/ idxtype *ReadMeshWgt(char *filename, idxtype *ne, idxtype *nn, idxtype *etype, idxtype *vwgt) { idxtype i, j, k, esize; idxtype *elmnts; FILE *fpin; fpin = gk_fopen(filename, "r", __func__); mfscanf(fpin, "%D %D", ne, etype); mfscanf(fpin, "%D", &i); switch (*etype) { case 1: esize = 3; break; case 2: esize = 4; break; case 3: esize = 8; break; case 4: esize = 4; break; case 5: esize = 2; break; default: errexit("Unknown mesh-element type: %d\n", *etype); } elmnts = idxmalloc(esize*(*ne), "ReadMeshWgt: elmnts"); for (j=0, i=0; i<*ne; i++) { mfscanf(fpin, "%D", vwgt+i); for (k=0; k<esize; k++) { mfscanf(fpin, "%D", elmnts+j); elmnts[j++]--; } } gk_fclose(fpin); *nn = elmnts[idxargmax(j, elmnts)]+1; return elmnts; }
/************************************************************************* * This function writes out the partition vector **************************************************************************/ void WritePartition(char *fname, idxtype *part, idxtype n, idxtype nparts) { FILE *fpout; idxtype i; char filename[256]; msprintf(filename,"%s.part.%D",fname, nparts); fpout = gk_fopen(filename, "w", __func__); for (i=0; i<n; i++) fprintf(fpout,"%" PRIIDX "\n", part[i]); gk_fclose(fpout); }
/************************************************************************* * This function writes out the partition vector **************************************************************************/ void WritePermutation(char *fname, idxtype *iperm, idxtype n) { FILE *fpout; idxtype i; char filename[256]; msprintf(filename,"%s.iperm",fname); fpout = gk_fopen(filename, "w", __func__); for (i=0; i<n; i++) fprintf(fpout, "%" PRIIDX "\n", iperm[i]); gk_fclose(fpout); }
/************************************************************************* * This function writes a graphs into a file **************************************************************************/ void WriteGraph(char *filename, idxtype nvtxs, idxtype *xadj, idxtype *adjncy) { idxtype i, j; FILE *fpout; fpout = gk_fopen(filename, "w", __func__); mfprintf(fpout, "%D %D", nvtxs, xadj[nvtxs]/2); for (i=0; i<nvtxs; i++) { mfprintf(fpout, "\n"); for (j=xadj[i]; j<xadj[i+1]; j++) fprintf(fpout, " %" PRIIDX, adjncy[j]+1); } gk_fclose(fpout); }
/************************************************************************* * This function reads the weights of each elements **************************************************************************/ idxtype *ReadWgt(char *filename, idxtype *ne, idxtype *nn, idxtype *etype) { idxtype i, j, k, l, esize; idxtype *vwgt; FILE *fpin; fpin = gk_fopen(filename, "r", __func__); mfscanf(fpin, "%D %D", ne, etype); mfscanf(fpin, "%D", &i); switch (*etype) { case 1: esize = 3; break; case 2: esize = 4; break; case 3: esize = 8; break; case 4: esize = 4; break; case 5: esize = 2; break; default: errexit("Unknown mesh-element type: %d\n", *etype); } vwgt = idxmalloc(*ne, "ReadWgt: vwgt"); for (j=0, i=0; i<*ne; i++) { mfscanf(fpin, "%D", vwgt+i); for (k=0; k<esize; k++) { mfscanf(fpin, "%D", &l); j++; } } gk_fclose(fpin); return vwgt; }
/**************************************************************************** * This function detect the input mesh type ***************************************************************************/ int MeshType(char *filename) { int i, j, k, l, len, cnt=0; FILE *fpin; char temp[40], inpt[80]; int firstline[3]; fpin = gk_fopen(filename, "r", __func__); mfscanf(fpin,"%[^\n]s", inpt); gk_fclose(fpin); len = strlen(inpt); i=0;k=0; while (inpt[i]==' ') i++; while (i<=len) { if (inpt[i]==' ' || i==len) { l=0; for (j=k; j<i;j++ ) temp[l++]=inpt[j]; temp[l]='\0'; firstline[cnt++] = atoi(temp); while (inpt[i]==' ') i++; k=i; if (i==len) break; } else i++; } if (cnt==1) return 0; /*Mixed element without weight */ else if (cnt==2 && firstline[1]>0) return 1; /*Fixed element without weight*/ else if (cnt==2 && firstline[1]==-1) return 2; /*Mixed element with weight*/ else if (cnt==3 && firstline[2]==-1) return 3; /*Fixed element with weight*/ }
/************************************************************************ * This function reads the element node array of a i Mixed mesh **************************************************************************/ idxtype *ReadMgcnums(char *filename) { idxtype i; idxtype *mgc; FILE *fpin; fpin = gk_fopen(filename, "r", __func__); mgc = idxmalloc(36, "Readmgcnums: mgcnums"); for (i=0; i<36; i++) { if (i<6 || i%6==0) mgc[i]=-1; else mfscanf(fpin, "%D", mgc+i); } gk_fclose(fpin); return mgc; }
/************************************************************************* * This function reads the spd matrix **************************************************************************/ void ReadCoordinates(GraphType *graph, char *filename) { idxtype i, j, k, l, nvtxs, fmt, readew, readvw, ncon, edge, ewgt; FILE *fpin; char *line; fpin = gk_fopen(filename, "r", __func__); nvtxs = graph->nvtxs; graph->coords = gk_dsmalloc(3*nvtxs, 0.0, "ReadCoordinates: coords"); line = gk_cmalloc(MAXLINE+1, "ReadCoordinates: line"); for (i=0; i<nvtxs; i++) { fgets(line, MAXLINE, fpin); msscanf(line, "%lf %lf %lf", graph->coords+3*i+0, graph->coords+3*i+1, graph->coords+3*i+2); } gk_fclose(fpin); gk_free((void **)&line, LTERM); }
gk_seq_t *gk_seq_ReadGKMODPSSM(char *filename) { gk_seq_t *seq; gk_idx_t i, j, ii; size_t ntokens, nbytes, len; FILE *fpin; gk_Tokens_t tokens; static char *AAORDER = "ARNDCQEGHILKMFPSTWYVBZX*"; static int PSSMWIDTH = 20; char *header, line[MAXLINELEN]; gk_i2cc2i_t *converter; header = gk_cmalloc(PSSMWIDTH, "gk_seq_ReadGKMODPSSM: header"); converter = gk_i2cc2i_create_common(AAORDER); gk_getfilestats(filename, &len, &ntokens, NULL, &nbytes); len --; seq = gk_malloc(sizeof(gk_seq_t),"gk_seq_ReadGKMODPSSM"); gk_seq_init(seq); seq->len = len; seq->sequence = gk_imalloc(len, "gk_seq_ReadGKMODPSSM"); seq->pssm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM"); seq->psfm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM"); seq->nsymbols = PSSMWIDTH; seq->name = gk_getbasename(filename); fpin = gk_fopen(filename,"r","gk_seq_ReadGKMODPSSM"); /* Read the header line */ if (fgets(line, MAXLINELEN-1, fpin) == NULL) errexit("Unexpected end of file: %s\n", filename); gk_strtoupper(line); gk_strtokenize(line, " \t\n", &tokens); for (i=0; i<PSSMWIDTH; i++) header[i] = tokens.list[i][0]; gk_freetokenslist(&tokens); /* Read the rest of the lines */ for (i=0, ii=0; ii<len; ii++) { if (fgets(line, MAXLINELEN-1, fpin) == NULL) errexit("Unexpected end of file: %s\n", filename); gk_strtoupper(line); gk_strtokenize(line, " \t\n", &tokens); seq->sequence[i] = converter->c2i[(int)tokens.list[1][0]]; for (j=0; j<PSSMWIDTH; j++) { seq->pssm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+j]); seq->psfm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+PSSMWIDTH+j]); } gk_freetokenslist(&tokens); i++; } seq->len = i; /* Reset the length if certain characters were skipped */ gk_free((void **)&header, LTERM); gk_fclose(fpin); return seq; }
void ComputeNeighbors(params_t *params) { int i, j, nhits; gk_csr_t *mat; int32_t *marker; gk_fkv_t *hits, *cand; FILE *fpout; printf("Reading data for %s...\n", params->infstem); mat = gk_csr_Read(params->infstem, GK_CSR_FMT_CSR, 1, 0); printf("#docs: %d, #nnz: %d.\n", mat->nrows, mat->rowptr[mat->nrows]); /* compact the column-space of the matrices */ gk_csr_CompactColumns(mat); /* perform auxiliary normalizations/pre-computations based on similarity */ gk_csr_Normalize(mat, GK_CSR_ROW, 2); /* create the inverted index */ gk_csr_CreateIndex(mat, GK_CSR_COL); /* create the output file */ fpout = (params->outfile ? gk_fopen(params->outfile, "w", "ComputeNeighbors: fpout") : NULL); /* allocate memory for the necessary working arrays */ hits = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: hits"); marker = gk_i32smalloc(mat->nrows, -1, "ComputeNeighbors: marker"); cand = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: cand"); /* find the best neighbors for each query document */ gk_startwctimer(params->timer_1); for (i=0; i<mat->nrows; i++) { if (params->verbosity > 0) printf("Working on query %7d\n", i); /* find the neighbors of the ith document */ nhits = gk_csr_GetSimilarRows(mat, mat->rowptr[i+1]-mat->rowptr[i], mat->rowind+mat->rowptr[i], mat->rowval+mat->rowptr[i], GK_CSR_COS, params->nnbrs, params->minsim, hits, marker, cand); /* write the results in the file */ if (fpout) { for (j=0; j<nhits; j++) fprintf(fpout, "%8d %8d %.3f\n", i, hits[j].val, hits[j].key); } } gk_stopwctimer(params->timer_1); /* cleanup and exit */ if (fpout) gk_fclose(fpout); gk_free((void **)&hits, &marker, &cand, LTERM); gk_csr_Free(&mat); return; }
int main(int argc, char *argv[]) { ssize_t i, j, niter; params_t *params; gk_csr_t *mat; FILE *fpout; /* get command-line options */ params = parse_cmdline(argc, argv); /* read the data */ mat = gk_csr_Read(params->infile, GK_CSR_FMT_METIS, 1, 1); /* display some basic stats */ print_init_info(params, mat); if (params->ntvs != -1) { /* compute the pr for different randomly generated restart-distribution vectors */ float **prs; prs = gk_fAllocMatrix(params->ntvs, mat->nrows, 0.0, "main: prs"); /* generate the random restart vectors */ for (j=0; j<params->ntvs; j++) { for (i=0; i<mat->nrows; i++) prs[j][i] = RandomInRange(931); gk_fscale(mat->nrows, 1.0/gk_fsum(mat->nrows, prs[j], 1), prs[j], 1); niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, prs[j]); printf("tvs#: %zd; niters: %zd\n", j, niter); } /* output the computed pr scores */ fpout = gk_fopen(params->outfile, "w", "main: outfile"); for (i=0; i<mat->nrows; i++) { for (j=0; j<params->ntvs; j++) fprintf(fpout, "%.4e ", prs[j][i]); fprintf(fpout, "\n"); } gk_fclose(fpout); gk_fFreeMatrix(&prs, params->ntvs, mat->nrows); } else if (params->ppr != -1) { /* compute the personalized pr from the specified vertex */ float *pr; pr = gk_fsmalloc(mat->nrows, 0.0, "main: pr"); pr[params->ppr-1] = 1.0; niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr); printf("ppr: %d; niters: %zd\n", params->ppr, niter); /* output the computed pr scores */ fpout = gk_fopen(params->outfile, "w", "main: outfile"); for (i=0; i<mat->nrows; i++) fprintf(fpout, "%.4e\n", pr[i]); gk_fclose(fpout); gk_free((void **)&pr, LTERM); } else { /* compute the standard pr */ int jmax; float diff, maxdiff; float *pr; pr = gk_fsmalloc(mat->nrows, 1.0/mat->nrows, "main: pr"); niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr); printf("pr; niters: %zd\n", niter); /* output the computed pr scores */ fpout = gk_fopen(params->outfile, "w", "main: outfile"); for (i=0; i<mat->nrows; i++) { for (jmax=i, maxdiff=0.0, j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) { if ((diff = fabs(pr[i]-pr[mat->rowind[j]])) > maxdiff) { maxdiff = diff; jmax = mat->rowind[j]; } } fprintf(fpout, "%.4e %10zd %.4e %10d\n", pr[i], mat->rowptr[i+1]-mat->rowptr[i], maxdiff, jmax+1); } gk_fclose(fpout); gk_free((void **)&pr, LTERM); } gk_csr_Free(&mat); /* display some final stats */ print_final_info(params); }
/************************************************************************* * This function reads the spd matrix **************************************************************************/ void ReadGraph(GraphType *graph, char *filename, idxtype *wgtflag) { idxtype i, j, k, l, fmt, readew, readvw, ncon, edge, ewgt; idxtype *xadj, *adjncy, *vwgt, *adjwgt; char *line, *oldstr, *newstr; FILE *fpin; InitGraph(graph); line = gk_cmalloc(MAXLINE+1, "ReadGraph: line"); fpin = gk_fopen(filename, "r", __func__); do { fgets(line, MAXLINE, fpin); } while (line[0] == '%' && !feof(fpin)); if (feof(fpin)) { graph->nvtxs = 0; gk_free((void **)&line, LTERM); return; } fmt = ncon = 0; msscanf(line, "%D %D %D %D", &(graph->nvtxs), &(graph->nedges), &fmt, &ncon); readew = (fmt%10 > 0); readvw = ((fmt/10)%10 > 0); if (fmt >= 100) { mprintf("Cannot read this type of file format!"); exit(0); } *wgtflag = 0; if (readew) *wgtflag += 1; if (readvw) *wgtflag += 2; if (ncon > 0 && !readvw) { mprintf("------------------------------------------------------------------------------\n"); mprintf("*** I detected an error in your input file ***\n\n"); mprintf("You specified ncon=%D, but the fmt parameter does not specify vertex weights\n", ncon); mprintf("Make sure that the fmt parameter is set to either 10 or 11.\n"); mprintf("------------------------------------------------------------------------------\n"); exit(0); } graph->nedges *=2; ncon = graph->ncon = (ncon == 0 ? 1 : ncon); /*mprintf("%D %D %D %D %D [%D %D]\n", fmt, fmt%10, (fmt/10)%10, ncon, graph->ncon, readew, readvw);*/ if (graph->nvtxs > MAXIDX) errexit("\nThe matrix is too big: %d [%d %d]\n", graph->nvtxs, MAXIDX, sizeof(idxtype)); xadj = graph->xadj = idxsmalloc(graph->nvtxs+1, 0, "ReadGraph: xadj"); adjncy = graph->adjncy = idxmalloc(graph->nedges, "ReadGraph: adjncy"); vwgt = graph->vwgt = (readvw ? idxmalloc(ncon*graph->nvtxs, "ReadGraph: vwgt") : NULL); adjwgt = graph->adjwgt = (readew ? idxmalloc(graph->nedges, "ReadGraph: adjwgt") : NULL); /* Start reading the graph file */ for (xadj[0]=0, k=0, i=0; i<graph->nvtxs; i++) { do { fgets(line, MAXLINE, fpin); } while (line[0] == '%' && !feof(fpin)); oldstr = line; newstr = NULL; if (strlen(line) == MAXLINE) errexit("\nBuffer for fgets not big enough!\n"); if (readvw) { for (l=0; l<ncon; l++) { vwgt[i*ncon+l] = strtoidx(oldstr, &newstr, 10); oldstr = newstr; } } for (;;) { edge = strtoidx(oldstr, &newstr, 10) -1; oldstr = newstr; if (readew) { ewgt = strtoidx(oldstr, &newstr, 10); oldstr = newstr; } if (edge < 0) break; adjncy[k] = edge; if (readew) adjwgt[k] = ewgt; k++; } xadj[i+1] = k; } gk_fclose(fpin); if (k != graph->nedges) { mprintf("------------------------------------------------------------------------------\n"); mprintf("*** I detected an error in your input file ***\n\n"); mprintf("In the first line of the file, you specified that the graph contained\n%D edges. However, I only found %D edges in the file.\n", graph->nedges/2, k/2); if (2*k == graph->nedges) { mprintf("\n *> I detected that you specified twice the number of edges that you have in\n"); mprintf(" the file. Remember that the number of edges specified in the first line\n"); mprintf(" counts each edge between vertices v and u only once.\n\n"); } mprintf("Please specify the correct number of edges in the first line of the file.\n"); mprintf("------------------------------------------------------------------------------\n"); exit(0); } gk_free((void **)&line, LTERM); }
gk_graph_t *gk_graph_Read(char *filename, int format, int isfewgts, int isfvwgts, int isfvsizes) { ssize_t i, k, l; size_t nfields, nvtxs, nedges, fmt, ncon, lnlen; int32_t ival; float fval; int readsizes=0, readwgts=0, readvals=0, numbering=0; char *line=NULL, *head, *tail, fmtstr[256]; FILE *fpin=NULL; gk_graph_t *graph=NULL; if (!gk_fexists(filename)) gk_errexit(SIGERR, "File %s does not exist!\n", filename); if (format == GK_GRAPH_FMT_METIS) { fpin = gk_fopen(filename, "r", "gk_graph_Read: fpin"); do { if (gk_getline(&line, &lnlen, fpin) <= 0) gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename); } while (line[0] == '%'); fmt = ncon = 0; nfields = sscanf(line, "%zu %zu %zu %zu", &nvtxs, &nedges, &fmt, &ncon); if (nfields < 2) gk_errexit(SIGERR, "Header line must contain at least 2 integers (#vtxs and #edges).\n"); nedges *= 2; if (fmt > 111) gk_errexit(SIGERR, "Cannot read this type of file format [fmt=%zu]!\n", fmt); sprintf(fmtstr, "%03zu", fmt%1000); readsizes = (fmtstr[0] == '1'); readwgts = (fmtstr[1] == '1'); readvals = (fmtstr[2] == '1'); numbering = 1; ncon = (ncon == 0 ? 1 : ncon); } else { gk_errexit(SIGERR, "Unrecognized format: %d\n", format); } graph = gk_graph_Create(); graph->nvtxs = nvtxs; graph->xadj = gk_zmalloc(nvtxs+1, "gk_graph_Read: xadj"); graph->adjncy = gk_i32malloc(nedges, "gk_graph_Read: adjncy"); if (readvals) { if (isfewgts) graph->fadjwgt = gk_fmalloc(nedges, "gk_graph_Read: fadjwgt"); else graph->iadjwgt = gk_i32malloc(nedges, "gk_graph_Read: iadjwgt"); } if (readsizes) { if (isfvsizes) graph->fvsizes = gk_fmalloc(nvtxs, "gk_graph_Read: fvsizes"); else graph->ivsizes = gk_i32malloc(nvtxs, "gk_graph_Read: ivsizes"); } if (readwgts) { if (isfvwgts) graph->fvwgts = gk_fmalloc(nvtxs*ncon, "gk_graph_Read: fvwgts"); else graph->ivwgts = gk_i32malloc(nvtxs*ncon, "gk_graph_Read: ivwgts"); } /*---------------------------------------------------------------------- * Read the sparse graph file *---------------------------------------------------------------------*/ numbering = (numbering ? - 1 : 0); for (graph->xadj[0]=0, k=0, i=0; i<nvtxs; i++) { do { if (gk_getline(&line, &lnlen, fpin) == -1) gk_errexit(SIGERR, "Pregraphure end of input file: file while reading row %d\n", i); } while (line[0] == '%'); head = line; tail = NULL; /* Read vertex sizes */ if (readsizes) { if (isfvsizes) { #ifdef __MSC__ graph->fvsizes[i] = (float)strtod(head, &tail); #else graph->fvsizes[i] = strtof(head, &tail); #endif if (tail == head) gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1); if (graph->fvsizes[i] < 0) gk_errexit(SIGERR, "The size for vertex %zd must be >= 0\n", i+1); } else { graph->ivsizes[i] = strtol(head, &tail, 0); if (tail == head) gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1); if (graph->ivsizes[i] < 0) gk_errexit(SIGERR, "The size for vertex %zd must be >= 0\n", i+1); } head = tail; } /* Read vertex weights */ if (readwgts) { for (l=0; l<ncon; l++) { if (isfvwgts) { #ifdef __MSC__ graph->fvwgts[i*ncon+l] = (float)strtod(head, &tail); #else graph->fvwgts[i*ncon+l] = strtof(head, &tail); #endif if (tail == head) gk_errexit(SIGERR, "The line for vertex %zd does not have enough weights " "for the %d constraints.\n", i+1, ncon); if (graph->fvwgts[i*ncon+l] < 0) gk_errexit(SIGERR, "The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l); } else { graph->ivwgts[i*ncon+l] = strtol(head, &tail, 0); if (tail == head) gk_errexit(SIGERR, "The line for vertex %zd does not have enough weights " "for the %d constraints.\n", i+1, ncon); if (graph->ivwgts[i*ncon+l] < 0) gk_errexit(SIGERR, "The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l); } head = tail; } } /* Read the rest of the row */ while (1) { ival = (int)strtol(head, &tail, 0); if (tail == head) break; head = tail; if ((graph->adjncy[k] = ival + numbering) < 0) gk_errexit(SIGERR, "Error: Invalid column number %d at row %zd.\n", ival, i); if (readvals) { if (isfewgts) { #ifdef __MSC__ fval = (float)strtod(head, &tail); #else fval = strtof(head, &tail); #endif if (tail == head) gk_errexit(SIGERR, "Value could not be found for edge! Vertex:%zd, NNZ:%zd\n", i, k); graph->fadjwgt[k] = fval; } else { ival = strtol(head, &tail, 0); if (tail == head) gk_errexit(SIGERR, "Value could not be found for edge! Vertex:%zd, NNZ:%zd\n", i, k); graph->iadjwgt[k] = ival; } head = tail; } k++; } graph->xadj[i+1] = k; } if (k != nedges) gk_errexit(SIGERR, "gk_graph_Read: Something wrong with the number of edges in " "the input file. nedges=%zd, Actualnedges=%zd.\n", nedges, k); gk_fclose(fpin); gk_free((void **)&line, LTERM); return graph; }
pdbf *gk_readpdbfile(char *fname) { /* {{{ */ int i=0, res=0; char linetype[6]; int aserial; char aname[5] = " \0"; char altLoc = ' '; char rname[4] = " \0"; char chainid = ' '; char oldchainid = ' '; int rserial; int oldRserial = -37; char icode = ' '; char element = ' '; double x; double y; double z; double avgx; double avgy; double avgz; double opcy; double tmpt; char line[MAXLINELEN]; int corruption=0; int nresatoms; int atoms=0, residues=0, cas=0, bbs=0, firstres=1; pdbf *toFill = gk_malloc(sizeof(pdbf),"fillme"); FILE *FPIN; FPIN = gk_fopen(fname,"r",fname); while(fgets(line, 256, FPIN)) { sscanf(line,"%s ",linetype); /* It seems the only reliable parts are through temperature, so we only use these parts */ /* if(strstr(linetype, "ATOM") != NULL || strstr(linetype, "HETATM") != NULL) { */ if(strstr(linetype, "ATOM") != NULL) { sscanf(line, "%6s%5d%*1c%4c%1c%3c%*1c%1c%4d%1c%*3c%8lf%8lf%8lf%6lf%6lf %c\n", linetype,&aserial,aname,&altLoc,rname,&chainid,&rserial,&icode,&x,&y,&z,&opcy,&tmpt,&element); sscanf(linetype, " %s ",linetype); sscanf(aname, " %s ",aname); sscanf(rname, " %s ",rname); if(altLoc != ' ') { corruption = corruption|CRP_ALTLOCS; } if(firstres == 1) { oldRserial = rserial; oldchainid = chainid; residues++; firstres = 0; } if(oldRserial != rserial) { residues++; oldRserial = rserial; } if(oldchainid != chainid) { corruption = corruption|CRP_MULTICHAIN; } oldchainid = chainid; atoms++; if(strcmp(aname,"CA") == 0) { cas++; } if(strcmp(aname,"N") == 0 || strcmp(aname,"CA") == 0 || strcmp(aname,"C") == 0 || strcmp(aname,"O") == 0) { bbs++; } } else if(strstr(linetype, "ENDMDL") != NULL || strstr(linetype, "END") != NULL || strstr(linetype, "TER") != NULL) { break; } } fclose(FPIN); /* printf("File has coordinates for %d atoms in %d residues\n",atoms,residues); */ toFill->natoms = atoms; toFill->ncas = cas; toFill->nbbs = bbs; toFill->nresidues = residues; toFill->resSeq = (char *) gk_malloc (residues*sizeof(char),"residue seq"); toFill->threeresSeq = (char **)gk_malloc (residues*sizeof(char *),"residue seq"); toFill->atoms = (atom *) gk_malloc (atoms*sizeof(atom), "atoms"); toFill->bbs = (atom **)gk_malloc ( bbs*sizeof(atom *),"bbs"); toFill->cas = (atom **)gk_malloc ( cas*sizeof(atom *),"cas"); toFill->cm = (center_of_mass *)gk_malloc(residues*sizeof(center_of_mass),"center of mass"); res=0; firstres=1; cas=0; bbs=0; i=0; avgx = 0.0; avgy = 0.0; avgz = 0.0; nresatoms = 0; FPIN = gk_fopen(fname,"r",fname); while(fgets(line, 256, FPIN)) { sscanf(line,"%s ",linetype); /* It seems the only reliable parts are through temperature, so we only use these parts */ /* if(strstr(linetype, "ATOM") != NULL || strstr(linetype, "HETATM") != NULL) { */ if(strstr(linetype, "ATOM") != NULL ) { /* to ensure our memory doesn't get corrupted by the biologists, we only read this far */ sscanf(line, "%6s%5d%*1c%4c%1c%3c%*1c%1c%4d%1c%*3c%8lf%8lf%8lf%6lf%6lf %c\n", linetype,&aserial,aname,&altLoc,rname,&chainid,&rserial,&icode,&x,&y,&z,&opcy,&tmpt,&element); sscanf(aname, "%s",aname); sscanf(rname, "%s",rname); if(firstres == 1) { toFill->resSeq[res] = gk_threetoone(rname); toFill->threeresSeq[res] = gk_strdup(rname); oldRserial = rserial; res++; firstres = 0; } if(oldRserial != rserial) { /* we're changing residues. store the center of mass from the last one & reset */ toFill->cm[res-1].x = avgx/nresatoms; toFill->cm[res-1].y = avgy/nresatoms; toFill->cm[res-1].z = avgz/nresatoms; avgx = 0.0; avgy = 0.0; avgz = 0.0; nresatoms = 0; toFill->cm[res-1].name = toFill->resSeq[res-1]; toFill->threeresSeq[res] = gk_strdup(rname); toFill->resSeq[res] = gk_threetoone(rname); res++; oldRserial = rserial; } avgx += x; avgy += y; avgz += z; nresatoms++; toFill->atoms[i].x = x; toFill->atoms[i].y = y; toFill->atoms[i].z = z; toFill->atoms[i].opcy = opcy; toFill->atoms[i].tmpt = tmpt; toFill->atoms[i].element = element; toFill->atoms[i].serial = aserial; toFill->atoms[i].chainid = chainid; toFill->atoms[i].altLoc = altLoc; toFill->atoms[i].rserial = rserial; toFill->atoms[i].icode = icode; toFill->atoms[i].name = gk_strdup(aname); toFill->atoms[i].resname = gk_strdup(rname); /* Set up pointers for the backbone and c-alpha shortcuts */ if(strcmp(aname,"CA") == 0) { toFill->cas[cas] = &(toFill->atoms[i]); cas++; } if(strcmp(aname,"N") == 0 || strcmp(aname,"CA") == 0 || strcmp(aname,"C") == 0 || strcmp(aname,"O") == 0) { toFill->bbs[bbs] = &(toFill->atoms[i]); bbs++; } i++; } else if(strstr(linetype, "ENDMDL") != NULL || strstr(linetype, "END") != NULL || strstr(linetype, "TER") != NULL) { break; } } /* get that last average */ toFill->cm[res-1].x = avgx/nresatoms; toFill->cm[res-1].y = avgy/nresatoms; toFill->cm[res-1].z = avgz/nresatoms; /* Begin test code */ if(cas != residues) { printf("Number of residues and CA coordinates differs by %d (!)\n",residues-cas); if(cas < residues) { corruption = corruption|CRP_MISSINGCA; } else if(cas > residues) { corruption = corruption|CRP_MULTICA; } } if(bbs < residues*4) { corruption = corruption|CRP_MISSINGBB; } else if(bbs > residues*4) { corruption = corruption|CRP_MULTIBB; } fclose(FPIN); toFill->corruption = corruption; /* if(corruption == 0) printf("File was clean!\n"); */ return(toFill); } /* }}} */