Ejemplo n.º 1
0
void PrintNewBestSub(Substructure *sub, SubList *subList,
                     Parameters *parameters)
{
   ULONG outputLevel = parameters->outputLevel;

   if ((subList->head == NULL) ||
       (sub->value > subList->head->sub->value)) 
   {
      parameters->outputLevel = 1; // turn off instance printing
      printf("\nNew best ");
      PrintSub(sub, parameters);
      printf("\n");
      parameters->outputLevel = outputLevel;
   }
}
Ejemplo n.º 2
0
void SuffixTree<Symb,NSymb>::Print(std::ostream& str, uint flags, PNode n, PNode prev, int ind)
{
	if(n == NIL) { n = ROOT; prev = NIL; }

	Node& node = GetNode(n);
	//if(IsInternal(n)) {
		int len = (n==ROOT) ? 0 : node.len;

		PrintInd(str, ind);
		if(flags & 1) {
			PrintSub(str, node.pos, node.pos + len);
			str << ' ';
		}
		if(flags & 2)
			str << len << ' ';
		//if(flags & 4)
		//	str << node.dep << ' ';
		if(flags & 8)
			str << node.count << ' ';
		str << " [" << n << "]->[" << node.suf << "]" << std::endl;

		ind += len;
		for(int i = 1; i <= NSymb; i++) {
			PNode ch = GetChild(n, i % NSymb);
			if(ch != NIL)
				Print(str, flags, ch, n, ind);
		}
	//}
	//else {
	//	int pos = GetLeafPos(prev, n);
	//
	//	PrintInd(str, ind);
	//	if(flags & 1) {
	//		PrintSub(str, pos);
	//		str << ' ';
	//	}
	//	if(flags & 2)
	//		str << (dlen - pos) << ' ';
	//	if(flags & 4)
	//		str << dlen - node.dep << ' ';
	//	if(flags & 8)
	//		str << node.count << ' ';
	//	str << " (" << node.dep << ")" << endl;
	//}
}
Ejemplo n.º 3
0
void PrintSubList(SubList *subList, Parameters *parameters)
{
   ULONG counter = 1;
   SubListNode *subListNode = NULL;

   if (subList != NULL) 
   {
      subListNode = subList->head;
      while (subListNode != NULL) 
      {
         printf("(%lu) ", counter);
         counter++;
         PrintSub(subListNode->sub, parameters);
         printf("\n");
         subListNode = subListNode->next;
      }
   }
}
Ejemplo n.º 4
0
SubList *DiscoverSubs(Parameters *parameters)
{
   SubList *parentSubList;
   SubList *childSubList;
   SubList *extendedSubList;
   SubList *discoveredSubList;
   SubListNode *parentSubListNode;
   SubListNode *extendedSubListNode;
   Substructure *parentSub;
   Substructure *extendedSub;
   Substructure *recursiveSub = NULL;

   // parameters used
   ULONG limit          = parameters->limit;
   ULONG numBestSubs    = parameters->numBestSubs;
   ULONG beamWidth      = parameters->beamWidth;
   BOOLEAN valueBased   = parameters->valueBased;
   LabelList *labelList = parameters->labelList;
   BOOLEAN prune        = parameters->prune;
   ULONG maxVertices    = parameters->maxVertices;
   ULONG minVertices    = parameters->minVertices;
   ULONG outputLevel    = parameters->outputLevel;
   BOOLEAN recursion    = parameters->recursion;
   ULONG evalMethod     = parameters->evalMethod;

   // get initial one-vertex substructures
   parentSubList = GetInitialSubs(parameters);

   discoveredSubList = AllocateSubList();
   while ((limit > 0) && (parentSubList->head != NULL)) 
   {
      parentSubListNode = parentSubList->head;
      childSubList = AllocateSubList();
      // extend each substructure in parent list
      while (parentSubListNode != NULL)
      {
         parentSub = parentSubListNode->sub;
         parentSubListNode->sub = NULL;
         if (outputLevel > 4) 
         {
            parameters->outputLevel = 1; // turn off instance printing
            printf("\nConsidering ");
            PrintSub(parentSub, parameters);
            printf("\n");
            parameters->outputLevel = outputLevel;
         }
         if ((((parentSub->numInstances > 1) && (evalMethod != EVAL_SETCOVER)) ||
              (parentSub->numNegInstances > 0)) &&
             (limit > 0))
         {
            limit--;
            if (outputLevel > 3)
               printf("%lu substructures left to be considered\n", limit);
            fflush(stdout);
            extendedSubList = ExtendSub(parentSub, parameters);
            extendedSubListNode = extendedSubList->head;
            while (extendedSubListNode != NULL) 
            {
               extendedSub = extendedSubListNode->sub;
               extendedSubListNode->sub = NULL;
               if (extendedSub->definition->numVertices <= maxVertices) 
               {
                  // evaluate each extension and add to child list
                  EvaluateSub(extendedSub, parameters);
                  if (prune && (extendedSub->value < parentSub->value)) 
                  {
                     FreeSub(extendedSub);
                  } 
                  else 
                  {
                     SubListInsert(extendedSub, childSubList, beamWidth, 
                                   valueBased, labelList);
                  }
               } 
               else 
               {
                  FreeSub(extendedSub);
               }
               extendedSubListNode = extendedSubListNode->next;
            }
            FreeSubList(extendedSubList);
         }
         // add parent substructure to final discovered list
         if (parentSub->definition->numVertices >= minVertices) 
         {
            if (! SinglePreviousSub(parentSub, parameters)) 
            {
               // consider recursive substructure, if requested
               if (recursion)
                  recursiveSub = RecursifySub(parentSub, parameters);
               if (outputLevel > 3)
                  PrintNewBestSub(parentSub, discoveredSubList, parameters);
               SubListInsert(parentSub, discoveredSubList, numBestSubs, FALSE,
                             labelList);
               if (recursion && (recursiveSub != NULL)) 
               {
                  if (outputLevel > 4) 
                  {
                     parameters->outputLevel = 1; // turn off instance printing
                     printf("\nConsidering Recursive ");
                     PrintSub(recursiveSub, parameters);
                     printf ("\n");
                     parameters->outputLevel = outputLevel;
                  }
                  if (outputLevel > 3)
                     PrintNewBestSub(recursiveSub, discoveredSubList, parameters);
                  SubListInsert(recursiveSub, discoveredSubList, numBestSubs,
                                FALSE, labelList);
               }
            }
         } 
         else 
         {
            FreeSub (parentSub);
         }
         parentSubListNode = parentSubListNode->next;
      }
      FreeSubList(parentSubList);
      parentSubList = childSubList;
   }

   if ((limit > 0) && (outputLevel > 2))
      printf ("\nSubstructure queue empty.\n");

   // try to insert any remaining subs in parent list on to discovered list
   parentSubListNode = parentSubList->head;
   while (parentSubListNode != NULL) 
   {
      parentSub = parentSubListNode->sub;
      parentSubListNode->sub = NULL;
      if (parentSub->definition->numVertices >= minVertices) 
      {
         if (! SinglePreviousSub(parentSub, parameters)) 
         {
            if (outputLevel > 3)
               PrintNewBestSub(parentSub, discoveredSubList, parameters);
            SubListInsert(parentSub, discoveredSubList, numBestSubs, FALSE,
                          labelList);
         }
      } 
      else 
      {
         FreeSub(parentSub);
      }
      parentSubListNode = parentSubListNode->next;
   }
   FreeSubList(parentSubList);
   return discoveredSubList;
}
Ejemplo n.º 5
0
int main(int argc, char **argv)
{
   struct tms tmsstart, tmsend;
   clock_t startTime, endTime;
   static long clktck = 0;
   time_t iterationStartTime;
   time_t iterationEndTime;
   SubList *subList;
   Substructure *normSub = NULL;
   Parameters *parameters;
   FILE *outputFile;
   ULONG iteration;
   BOOLEAN done;

   clktck = sysconf(_SC_CLK_TCK);
   startTime = times(&tmsstart);
   printf("GBAD %s\n\n", GBAD_VERSION);
   parameters = GetParameters(argc, argv);

   // compress positive graphs with predefined subs, if given
   if (parameters->numPreSubs > 0)
      CompressWithPredefinedSubs(parameters);

   PrintParameters(parameters);

   if (parameters->iterations > 1)
      printf("----- Iteration 1 -----\n\n");

   iteration = 1;
   parameters->currentIteration = iteration;
   done = FALSE;

   while ((iteration <= parameters->iterations) && (!done))
   {
      iterationStartTime = time(NULL);
      if (iteration > 1)
         printf("----- Iteration %lu -----\n\n", iteration);

      printf("%lu positive graphs: %lu vertices, %lu edges",
             parameters->numPosEgs, parameters->posGraph->numVertices,
             parameters->posGraph->numEdges);

      if (parameters->evalMethod == EVAL_MDL)
         printf(", %.0f bits\n", parameters->posGraphDL);
      else
         printf("\n");
      printf("%lu unique labels\n", parameters->labelList->numLabels);
      printf("\n");

      if ((parameters->prob) && (iteration > 1))
      {
         //
         // If GBAD-P option chosen, after the first iteration, we no longer
         // care about minsize of maxsize after the first iteration (if the
         // user specified these parameters), as we are just dealing with
         // single extensions from the normative - so set it to where we
         // just look at substructures that are composed of the normative
         // pattern (SUB_) and the single vertex extension.
         //
         parameters->minVertices = 1;
         parameters->maxVertices = 2;
      }
      //
      // If the user has specified a normative pattern, on the first iteration
      // need to save the top-N substructures, where N is what the user
      // specified with the -norm parameter.
      //
      ULONG saveNumBestSubs = parameters->numBestSubs;
      if ((iteration == 1) && (!parameters->noAnomalyDetection) &&
          (parameters->norm > parameters->numBestSubs))
         parameters->numBestSubs = parameters->norm;
      //
      // -prune is useful to get to the initial normative pattern, but 
      // possibly detremental to discovering anomalies... so, turn off 
      // pruning (in case it was turned on), so that it is not used in 
      // future iterations.
      //
      if ((parameters->prob) && (iteration > 1))
      {
         parameters->prune = FALSE;
      }
 
      subList = DiscoverSubs(parameters, iteration);

      //
      // Now that we have the best substructure(s), return the user
      // specified number of best substructures to its original value.
      //
      if (iteration == 1)
         parameters->numBestSubs = saveNumBestSubs;

      if (subList->head == NULL) 
      {
         done = TRUE;
         printf("No substructures found.\n\n");
      }
      else 
      {
         //
         // GBAD-MDL
         //
         if (parameters->mdl)
            GBAD_MDL(subList,parameters);

         //
         // GBAD-MPS
         //
         if (parameters->mps)
         {
            GBAD_MPS(subList,parameters);
         }

         //
         // GBAD-P
         //
         if (parameters->prob)
         {
            normSub = GBAD_P(subList,iteration,parameters);
         }

         // write output to stdout
         if (parameters->outputLevel > 1) 
         {
            printf("\nBest %lu substructures:\n\n", CountSubs (subList));
            PrintSubList(subList, parameters);
         } 
         else 
         {
            printf("\nBest substructure: ");
            if ((CountSubs(subList) > 0) && (subList->head->sub != NULL))
               PrintSub(subList->head->sub, parameters);
            else
               printf("None.");
            printf("\n\n");
         }

         // write machine-readable output to file, if given
         if (parameters->outputToFile) 
         {
            outputFile = fopen(parameters->outFileName, "a");
            if (outputFile == NULL) 
            {
               printf("WARNING: unable to write to output file %s,",
                      parameters->outFileName);
               printf("disabling\n");
               parameters->outputToFile = FALSE;
            }
            WriteGraphToFile(outputFile, subList->head->sub->definition,
                             parameters->labelList, 0, 0,
                             subList->head->sub->definition->numVertices,
                             TRUE);
            fclose(outputFile);
         }

         if (iteration < parameters->iterations) 
         {                                    // Another iteration?
            if (parameters->evalMethod == EVAL_SETCOVER) 
            {
               printf("Removing positive examples covered by");
               printf(" best substructure.\n\n");
               RemovePosEgsCovered(subList->head->sub, parameters);
            } 
            else 
            {
               //
               // For the GBAD-P algorithm, multiple iterations will need
               // to be performed, and if it is the first iteration
	       // AND the user has specified a different normative
	       // pattern (other than the best one), we need to 
	       // use the substructure that was set above.
	       //
	       if ((iteration == 1) && (parameters->prob))
	       {
	          printf("Compressing graph by best substructure (%lu):\n",
	                 parameters->norm);
                  PrintSub(normSub,parameters);
	          printf("\n");
                  CompressFinalGraphs(normSub, parameters, 
	                              iteration, FALSE);
               } else
                  CompressFinalGraphs(subList->head->sub, parameters, 
	                              iteration, FALSE);
	    }

            // check for stopping condition
            // if set-covering, then no more positive examples
            // if MDL or size, then positive graph contains no edges
            if (parameters->evalMethod == EVAL_SETCOVER) 
            {
               if (parameters->numPosEgs == 0) 
               {
                  done = TRUE;
                  printf("Ending iterations - ");
                  printf("all positive examples covered.\n\n");
               }
            } 
            else 
            {
               if (parameters->posGraph->numEdges == 0) 
               {
                  done = TRUE;
                  printf("Ending iterations - graph fully compressed.\n\n");
               }
            }
         }
         if ((iteration == parameters->iterations) && (parameters->compress))
         {
            if (parameters->evalMethod == EVAL_SETCOVER)
               WriteUpdatedGraphToFile(subList->head->sub, parameters);
            else 
               WriteCompressedGraphToFile(subList->head->sub, parameters,
                                          iteration);
         }
      }

      //
      // Need to store information regarding initial best substructure, for use
      // in future GBAD-P calculations
      //
      if ((parameters->prob) && (iteration == 1) && (subList->head != NULL))
      {
         parameters->numPreviousInstances = subList->head->sub->numInstances;
      }
      if ((parameters->prob) && (iteration > 1) && (subList->head != NULL))
         parameters->numPreviousInstances = subList->head->sub->numInstances;

      FreeSubList(subList);
      if (parameters->iterations > 1) 
      {
         iterationEndTime = time(NULL);
         printf("Elapsed time for iteration %lu = %lu seconds.\n\n",
         iteration, (iterationEndTime - iterationStartTime));
      }
      iteration++;
      parameters->currentIteration = iteration;
   }
 
   FreeParameters(parameters);
   endTime = times(&tmsend);
   printf("\nGBAD done (elapsed CPU time = %7.2f seconds).\n",
          (endTime - startTime) / (double) clktck);
   return 0;
}