Graph *ReadGraph(char *filename, LabelList *labelList, BOOLEAN directed) { Graph *graph; FILE *graphFile; ULONG lineNo; // Line number counter for graph file char token[TOKEN_LEN]; ULONG vertexListSize = 0; // Size of currently-allocated vertex array ULONG edgeListSize = 0; // Size of currently-allocated edge array ULONG vertexOffset = 0; // Dummy argument to ReadVertex and ReadEdge // Allocate graph graph = AllocateGraph(0,0); // Open graph file graphFile = fopen(filename,"r"); if (graphFile == NULL) { fprintf(stderr, "Unable to open graph file %s.\n", filename); exit(1); } // Parse graph file lineNo = 1; while (ReadToken(token, graphFile, &lineNo) != 0) { if (strcmp(token, "v") == 0) // read vertex ReadVertex(graph, graphFile, labelList, &vertexListSize, &lineNo, vertexOffset); else if (strcmp(token, "e") == 0) // read 'e' edge ReadEdge(graph, graphFile, labelList, &edgeListSize, &lineNo, directed, vertexOffset); else if (strcmp(token, "u") == 0) // read undirected edge ReadEdge(graph, graphFile, labelList, &edgeListSize, &lineNo, FALSE, vertexOffset); else if (strcmp(token, "d") == 0) // read directed edge ReadEdge(graph, graphFile, labelList, &edgeListSize, &lineNo, TRUE, vertexOffset); else { fclose(graphFile); FreeGraph(graph); fprintf(stderr, "Unknown token %s in line %lu of graph file %s.\n", token, lineNo, filename); exit(1); } } fclose(graphFile); //***** trim vertex, edge and label lists return graph; }
Graph *CopyGraph(Graph *g) { Graph *gCopy; ULONG nv; ULONG ne; ULONG v; ULONG e; ULONG numEdges; nv = g->numVertices; ne = g->numEdges; // allocate graph gCopy = AllocateGraph(nv, ne); // copy vertices; allocate and copy vertex edge arrays for (v = 0; v < nv; v++) { gCopy->vertices[v].label = g->vertices[v].label; gCopy->vertices[v].map = g->vertices[v].map; gCopy->vertices[v].used = g->vertices[v].used; numEdges = g->vertices[v].numEdges; gCopy->vertices[v].numEdges = numEdges; gCopy->vertices[v].edges = NULL; if (numEdges > 0) { gCopy->vertices[v].edges = (ULONG *) malloc(numEdges * sizeof(ULONG)); if (gCopy->vertices[v].edges == NULL) OutOfMemoryError("CopyGraph:edges"); for (e = 0; e < numEdges; e++) gCopy->vertices[v].edges[e] = g->vertices[v].edges[e]; } } // copy edges for (e = 0; e < ne; e++) { gCopy->edges[e].vertex1 = g->edges[e].vertex1; gCopy->edges[e].vertex2 = g->edges[e].vertex2; gCopy->edges[e].label = g->edges[e].label; gCopy->edges[e].directed = g->edges[e].directed; gCopy->edges[e].used = g->edges[e].used; } return gCopy; }
SubList *GetInitialSubs(Parameters *parameters) { SubList *initialSubs; ULONG i, j; ULONG vertexLabelIndex; ULONG numInitialSubs; Graph *g; Substructure *sub; Instance *instance; // parameters used Graph *posGraph = parameters->posGraph; Graph *negGraph = parameters->negGraph; LabelList *labelList = parameters->labelList; ULONG outputLevel = parameters->outputLevel; ULONG currentIncrement = 0; ULONG startVertexIndex; if (parameters->incremental) { currentIncrement = GetCurrentIncrementNum(parameters); // Index for first vertex in increment // Begin with the index for the first vertex in this increment and // move up through all remaining vertices. Relies on the fact that // each new increment is placed on the end of the vertex array and that // we are only interested in the current (last) increment startVertexIndex = GetStartVertexIndex(currentIncrement, parameters, POS); if (parameters->outputLevel > 2) printf("Start vertex index = %lu\n", startVertexIndex); } else startVertexIndex = 0; // reset labels' used flag for (i = 0; i < labelList->numLabels; i++) labelList->labels[i].used = FALSE; numInitialSubs = 0; initialSubs = AllocateSubList(); for (i = startVertexIndex; i < posGraph->numVertices; i++) { posGraph->vertices[i].TimesAddedToInstanceList = 0; vertexLabelIndex = posGraph->vertices[i].label; if (labelList->labels[vertexLabelIndex].used == FALSE) { labelList->labels[vertexLabelIndex].used = TRUE; // create one-vertex substructure definition g = AllocateGraph(1, 0); g->vertices[0].label = vertexLabelIndex; g->vertices[0].numEdges = 0; g->vertices[0].edges = NULL; g->vertices[0].TimesAddedToInstanceList = 0; // allocate substructure sub = AllocateSub(); sub->definition = g; sub->instances = AllocateInstanceList(); // collect instances in positive graph j = posGraph->numVertices; do { j--; if (posGraph->vertices[j].label == vertexLabelIndex) { // ***** do inexact label matches here? (instance->minMatchCost // ***** too) instance = AllocateInstance(1, 0); instance->vertices[0] = j; instance->mapping[0].v1 = 0; instance->mapping[0].v2 = j; instance->minMatchCost = 0.0; InstanceListInsert(instance, sub->instances, FALSE); sub->numInstances++; } } while (j > i); // only keep substructure if more than one positive instance if (sub->numInstances > 1) { if (negGraph != NULL) { // collect instances in negative graph sub->negInstances = AllocateInstanceList(); j = negGraph->numVertices; if (parameters->incremental) startVertexIndex = GetStartVertexIndex(currentIncrement, parameters, POS); else startVertexIndex = 0; do { j--; if (negGraph->vertices[j].label == vertexLabelIndex) { // ***** do inexact label matches here? // ***** (instance->minMatchCost too) instance = AllocateInstance(1, 0); instance->vertices[0] = j; instance->mapping[0].v1 = 0; instance->mapping[0].v2 = j; instance->minMatchCost = 0.0; InstanceListInsert(instance, sub->negInstances, FALSE); sub->numNegInstances++; } // We need to try all negative graph labels } while (j > startVertexIndex); } EvaluateSub(sub, parameters); // add to initialSubs SubListInsert(sub, initialSubs, 0, FALSE, labelList); numInitialSubs++; } else { // prune single-instance substructure FreeSub(sub); } } } if (outputLevel > 1) printf("%lu initial substructures\n", numInitialSubs); return initialSubs; }
void Test(char *subsFileName, char *graphFileName, Parameters *parameters, ULONG *TPp, ULONG *TNp, ULONG *FPp, ULONG *FNp) { FILE *graphFile; LabelList *labelList; BOOLEAN directed; Graph **subGraphs; ULONG numSubGraphs; Graph *graph; BOOLEAN positive1; BOOLEAN positive2; ULONG vertexOffset = 0; ULONG lineNo = 1; char token[TOKEN_LEN]; ULONG FP = 0; ULONG FN = 0; ULONG TP = 0; ULONG TN = 0; ULONG i; labelList = parameters->labelList; directed = parameters->directed; // read substructures subGraphs = ReadSubGraphsFromFile(subsFileName, SUB_TOKEN, &numSubGraphs, parameters); fprintf(stdout, "Read %lu substructures from file %s.\n", numSubGraphs, subsFileName); // open example graphs file and compute stats graphFile = fopen(graphFileName, "r"); if (graphFile == NULL) { fprintf(stderr, "Unable to open graph file %s.\n", graphFileName); exit(1); } graph = NULL; positive1 = TRUE; while (ReadToken(token, graphFile, &lineNo) != 0) { if (strcmp(token, POS_EG_TOKEN) == 0) { // reading positive eg if (graph != NULL) { // test last graph positive2 = PositiveExample(graph, subGraphs, numSubGraphs, parameters); // increment appropriate counter if (positive1 && positive2) TP++; if (positive1 && (! positive2)) FN++; if ((! positive1) && positive2) FP++; if ((! positive1) && (! positive2)) TN++; FreeGraph(graph); } graph = AllocateGraph(0,0); positive1 = TRUE; } else if (strcmp(token, NEG_EG_TOKEN) == 0) { // reading negative eg if (graph != NULL) { // test last graph positive2 = PositiveExample(graph, subGraphs, numSubGraphs, parameters); // increment appropriate counter if (positive1 && positive2) TP++; if (positive1 && (! positive2)) FN++; if ((! positive1) && positive2) FP++; if ((! positive1) && (! positive2)) TN++; FreeGraph(graph); } graph = AllocateGraph(0,0); positive1 = FALSE; } else if (strcmp(token, "v") == 0) { // read vertex if (positive1 && (graph == NULL)) { // first graph starts without positive token, so assumed positive graph = AllocateGraph(0,0); } ReadVertex(graph, graphFile, labelList, &lineNo, vertexOffset); } else if (strcmp(token, "e") == 0) // read 'e' edge ReadEdge(graph, graphFile, labelList, &lineNo, directed, vertexOffset); else if (strcmp(token, "u") == 0) // read undirected edge ReadEdge(graph, graphFile, labelList, &lineNo, FALSE, vertexOffset); else if (strcmp(token, "d") == 0) // read directed edge ReadEdge(graph, graphFile, labelList, &lineNo, TRUE, vertexOffset); else { fclose(graphFile); fprintf(stderr, "Unknown token %s in line %lu of input file %s.\n", token, lineNo, graphFileName); exit(1); } } // test last graph if (graph != NULL) { positive2 = PositiveExample(graph, subGraphs, numSubGraphs, parameters); // increment appropriate counter if (positive1 && positive2) TP++; if (positive1 && (! positive2)) FN++; if ((! positive1) && positive2) FP++; if ((! positive1) && (! positive2)) TN++; FreeGraph(graph); } fclose(graphFile); // free substructure graphs for (i = 0; i < numSubGraphs; i++) FreeGraph(subGraphs[i]); free(subGraphs); *TPp = TP; *TNp = TN; *FPp = FP; *FNp = FN; }
void ReadInputFile(Parameters *parameters) { FILE *inputFile = NULL; Graph *graph = NULL; Graph *posGraph= NULL; Graph *negGraph = NULL; ULONG posGraphVertexListSize = 0; ULONG posGraphEdgeListSize = 0; ULONG negGraphVertexListSize = 0; ULONG negGraphEdgeListSize = 0; ULONG *vertexListSizePtr = NULL; ULONG *edgeListSizePtr = NULL; LabelList *labelList = NULL; ULONG numPosEgs = 0; ULONG numNegEgs = 0; ULONG *posEgsVertexIndices = NULL; ULONG *negEgsVertexIndices = NULL; BOOLEAN readingPositive = TRUE; ULONG vertexOffset = 0; BOOLEAN directed = TRUE; ULONG lineNo = 1; char token[TOKEN_LEN]; labelList = parameters->labelList; directed = parameters->directed; // Open input file inputFile = fopen(parameters->inputFileName,"r"); if (inputFile == NULL) { fprintf(stderr, "Unable to open input file %s.\n", parameters->inputFileName); exit(1); } // Parse input file while (ReadToken(token, inputFile, &lineNo) != 0) { if (strcmp(token, POS_EG_TOKEN) == 0) { // reading positive eg if (posGraph == NULL) posGraph = AllocateGraph(0,0); numPosEgs++; vertexOffset = posGraph->numVertices; posEgsVertexIndices = AddVertexIndex(posEgsVertexIndices, numPosEgs, vertexOffset); graph = posGraph; vertexListSizePtr = & posGraphVertexListSize; edgeListSizePtr = & posGraphEdgeListSize; readingPositive = TRUE; } else if (strcmp(token, NEG_EG_TOKEN) == 0) { // reading negative eg if (negGraph == NULL) negGraph = AllocateGraph(0,0); numNegEgs++; vertexOffset = negGraph->numVertices; negEgsVertexIndices = AddVertexIndex(negEgsVertexIndices, numNegEgs, vertexOffset); graph = negGraph; vertexListSizePtr = & negGraphVertexListSize; edgeListSizePtr = & negGraphEdgeListSize; readingPositive = FALSE; } else if (strcmp(token, "v") == 0) { // read vertex if (readingPositive && (posGraph == NULL)) { // first graph starts without positive token, so assumed positive posGraph = AllocateGraph(0,0); numPosEgs++; vertexOffset = 0; posEgsVertexIndices = AddVertexIndex(posEgsVertexIndices, numPosEgs, vertexOffset); graph = posGraph; vertexListSizePtr = & posGraphVertexListSize; edgeListSizePtr = & posGraphEdgeListSize; } ReadVertex(graph, inputFile, labelList, vertexListSizePtr, &lineNo, vertexOffset); } else if (strcmp(token, "e") == 0) // read 'e' edge ReadEdge(graph, inputFile, labelList, edgeListSizePtr, &lineNo, directed, vertexOffset); else if (strcmp(token, "u") == 0) // read undirected edge ReadEdge(graph, inputFile, labelList, edgeListSizePtr, &lineNo, FALSE, vertexOffset); else if (strcmp(token, "d") == 0) // read directed edge ReadEdge(graph, inputFile, labelList, edgeListSizePtr, &lineNo, TRUE, vertexOffset); else { fclose(inputFile); fprintf(stderr, "Unknown token %s in line %lu of input file %s.\n", token, lineNo, parameters->inputFileName); exit(1); } } fclose(inputFile); //***** trim vertex, edge and label lists parameters->posGraph = posGraph; parameters->negGraph = negGraph; parameters->labelList = labelList; parameters->numPosEgs = numPosEgs; parameters->numNegEgs = numNegEgs; parameters->posEgsVertexIndices = posEgsVertexIndices; parameters->negEgsVertexIndices = negEgsVertexIndices; }
Graph *InstanceToGraph(Instance *instance, Graph *graph) { Graph *newGraph; Vertex *vertex; Edge *edge; ULONG i, j; ULONG v1, v2; BOOLEAN found1; BOOLEAN found2; v1 = 0; v2 = 0; newGraph = AllocateGraph(instance->numVertices, instance->numEdges); // convert vertices for (i = 0; i < instance->numVertices; i++) { vertex = & graph->vertices[instance->vertices[i]]; newGraph->vertices[i].label = vertex->label; newGraph->vertices[i].numEdges = 0; newGraph->vertices[i].edges = NULL; newGraph->vertices[i].used = FALSE; } // convert edges for (i = 0; i < instance->numEdges; i++) { edge = & graph->edges[instance->edges[i]]; // find new indices for edge vertices j = 0; found1 = FALSE; found2 = FALSE; while ((! found1) || (! found2)) { if (instance->vertices[j] == edge->vertex1) { v1 = j; found1 = TRUE; } if (instance->vertices[j] == edge->vertex2) { v2 = j; found2 = TRUE; } j++; } // set new edge information newGraph->edges[i].vertex1 = v1; newGraph->edges[i].vertex2 = v2; newGraph->edges[i].label = edge->label; newGraph->edges[i].directed = edge->directed; newGraph->edges[i].used = FALSE; // add edge to appropriate vertices vertex = & newGraph->vertices[v1]; vertex->numEdges++; vertex->edges = (ULONG *) realloc(vertex->edges, sizeof(ULONG) * vertex->numEdges); if (vertex->edges == NULL) OutOfMemoryError("InstanceToGraph:vertex1->edges"); vertex->edges[vertex->numEdges - 1] = i; if (v1 != v2) { vertex = & newGraph->vertices[v2]; vertex->numEdges++; vertex->edges = (ULONG *) realloc(vertex->edges, sizeof(ULONG) * vertex->numEdges); if (vertex->edges == NULL) OutOfMemoryError("InstanceToGraph:vertex2->edges"); vertex->edges[vertex->numEdges - 1] = i; } } return newGraph; }