int main(int argc, char *argv[]) {
  FILE *FptrNumDocs;
  ENVIRONMENT env;
  char *model_data_dir;
  char pathbuf[BUFSIZ];
  char *col_label_file;
  int write_matlab;
  int col_labels_from_file;

  int rows, columns;
  MODEL_PARAMS model_params;
  MODEL_INFO model_info;

  env.word_array = NULL;
  env.word_tree = NULL;

  if ( argc != 17 ) usage_and_exit( argv[0], 1 );
  if ( strcmp( argv[1], "-mdir" ) != 0 ) usage_and_exit( argv[0], 1 );
  model_data_dir = argv[2];
  if ( strcmp( argv[3], "-matlab" ) != 0 ) usage_and_exit( argv[0], 1 );
  write_matlab = atoi( argv[4] );
  if ( strcmp( argv[5], "-precontext" ) != 0 ) usage_and_exit( argv[0], 1 );
  pre_context_size = atoi( argv[6] );
  if ( strcmp( argv[7], "-postcontext" ) != 0 ) usage_and_exit( argv[0], 1 );
  post_context_size = atoi( argv[8] );
  if ( strcmp( argv[9], "-rows" ) != 0 ) usage_and_exit( argv[0], 1 );
  rows = atoi( argv[10] );
  if ( strcmp( argv[11], "-columns" ) != 0 ) usage_and_exit( argv[0], 1 );
  columns = atoi( argv[12] );
  if ( strcmp( argv[13], "-col_labels_from_file" ) != 0 ) usage_and_exit( argv[0], 1 );
    col_labels_from_file = atoi( argv[14] );
  if ( strcmp( argv[15], "-col_label_file" ) != 0 ) usage_and_exit( argv[0], 1 );
    col_label_file = argv[16];  
  
  fprintf( stderr, "model data dir is \"%s\".\n", model_data_dir );

  /** Read in current model params **/
  sprintf( pathbuf, "%s/%s", model_data_dir, MODEL_PARAMS_BIN_FILE );
  if ( !read_model_params( pathbuf, &model_params )) {
    die( "count_wordvec.c: couldn't read model data file\n" );
  }

  sprintf( pathbuf, "%s/%s", model_data_dir, MODEL_INFO_BIN_FILE );
  if ( !read_model_info( pathbuf, &model_info )) {
    die( "count_wordvec.c: couldn't read model info file\n" );
  }

  if (model_params.rows < rows) {
    rows = model_params.rows;
  } else {
    model_params.rows = rows;
  }

  printf("count_wordvec.c: looking for %d rows\n", rows);
  printf("which had better match %d\n", model_params.rows);
  model_info.columns = columns;
  model_info.col_labels_from_file = col_labels_from_file;
  model_info.pre_context_size = pre_context_size;
  model_info.post_context_size = post_context_size;
  model_info.blocksize = BLOCKSIZE;
  model_info.start_columns = START_COLUMNS;

  message( "Reading the dictionary... ");
  if( !read_dictionary( &(env.word_array), &(env.word_tree), model_data_dir ))
    die( "count_wordvec.c: Can't read the dictionary.\n");

  /*** read number of ducuments from file ***/
  
  sprintf( pathbuf, "%s/%s", model_data_dir, FNUM_FILE );
  if ( !my_fopen( &FptrNumDocs, pathbuf, "r" ))
    die( "couldn't open filenames file" );

  if( !fscanf( FptrNumDocs, "%d", &numDocs ))
    die( "can't read numDocs" );

  if( !my_fclose( &FptrNumDocs ))
    die( "couldn't close numDocs file" );
  /*****/
  
  /* Set some initial values in the matrix, arrays etc. */
  if( !initialize_row_indices( env.word_array, &(env.row_indices), 
			       rows ))
    die( "Couldn't initialize row indices.\n");
  if( !initialize_column_indices( env.word_array, &(env.col_indices),
				  columns, col_labels_from_file, col_label_file, &(env.word_tree) ))
    die( "Couldn't initialize column indices.\n");

  /* Allocate memory and set everything to zero.
     Defined in matrix.h */
  if( !initialize_matrix( (MATRIX_TYPE***) &(env.matrix), rows, columns))
    die( "Can't initialize matrix.\n");

  /* Go through the wordlist, applying process_region to all regions. */
  fprintf( stderr, "model data dir is \"%s\".\n", model_data_dir );
  fprintf( stderr, "count_wordvec.c: about to call process_wordlist\n" );
  if( !process_wordlist( is_target, advance_target,
			 set_region_in, set_region_out, 
			 process_region , &env, model_data_dir))
    die( "Couldn't process wordlist.\n");

  /* Perform some conversion on the matrix.
     E.g. some kind of normalization.  We traditionally take the square root
     of all entries. */
  if( !transform_matrix( (MATRIX_TYPE **) (env.matrix), rows, columns))
    die( "Couldn't transform matrix.\n");
  
  /* Write the co-occurrence matrix. */
  message( "Writing the co-occurrence matrix.\n");

  if( !write_matrix_svd((MATRIX_TYPE **) (env.matrix),
                         rows, columns, model_data_dir ))
    die( "count_wordvec.c: couldn't write co-occurrence "
	 "matrix in SVD input format.\n" );

  if ( write_matlab ) {
    if ( !write_matrix_matlab( (MATRIX_TYPE **)(env.matrix),
			       rows, columns, model_data_dir ))
      die( "count_wordvec.c: couldn't write co-occurrence "
	   "matrix in Matlab input format.\n" );

  }

  sprintf( pathbuf, "%s/%s", model_data_dir, MODEL_PARAMS_BIN_FILE );
  if ( !write_model_params( pathbuf, &model_params )) {
    die( "count_wordvec.c: couldn't write model params file\n" );
  }

  sprintf( pathbuf, "%s/%s", model_data_dir, MODEL_INFO_BIN_FILE );
  if ( !write_model_info( pathbuf, &model_info )) {
    die( "count_wordvec.c: couldn't write model info file\n" );
  }

  exit( EXIT_SUCCESS);
}
Example #2
0
int main(int argc, char **argv) {
    char *output;
    int x;
    int y;
    struct cuda_device device;
    int available_words = 1;
    int current_words = 0;
    struct wordlist_file file;
    char input_hash[4][9];

    print_info();

    if (argc != ARG_COUNT) {
        printf("Usage: %s WORDLIST_FILE MD5_HASH\n", argv[0]);
        return -1;
    }

    if (process_wordlist(argv[ARG_WORDLIST], &file) == -1) {
        printf("Error Opening Wordlist File: %s\n", argv[ARG_WORDLIST]);
        return -1;
    }

    if (read_wordlist(&file) == 0) {
        printf("No valid passwords in the wordlist file: %s\n", argv[ARG_WORDLIST]);
        return -1;
    }

    // first things first, we need to select our CUDA device

    if (get_cuda_device(&device) == -1) {
        printf("No Cuda Device Installed\n");
        return -1;
    }

    // we now need to calculate the optimal amount of threads to use for this card

    calculate_cuda_params(&device);

    // now we input our target hash

    if (strlen(argv[ARG_MD5]) != 32) {
        printf("Not a valid MD5 Hash (should be 32 bytes and only Hex Chars\n");
        return -1;
    }

    // we split the input hash into 4 blocks

    memset(input_hash, 0, sizeof(input_hash));

    for(x=0; x < 4; x++) {
        strncpy(input_hash[x], argv[ARG_MD5] + (x * 8), 8);
        device.target_hash[x] = htonl(_httoi(input_hash[x]));
    }

    // allocate global memory for use on device
    if (cudaMalloc(&device.device_global_memory, device.device_global_memory_len) != CUDA_SUCCESS) {
        printf("Error allocating memory on device (global memory)\n");
        return -1;
    }

    // allocate the 'stats' that will indicate if we are successful in cracking
    if (cudaMalloc(&device.device_stats_memory, sizeof(struct device_stats)) != CUDA_SUCCESS) {
        printf("Error allocating memory on device (stats memory)\n");
        return -1;
    }

    // allocate debug memory if required
    if (cudaMalloc(&device.device_debug_memory, device.device_global_memory_len) != CUDA_SUCCESS) {
        printf("Error allocating memory on device (debug memory)\n");
        return -1;
    }

    // make sure the stats are clear on the device
    if (cudaMemset(device.device_stats_memory, 0, sizeof(struct device_stats)) != CUDA_SUCCESS) {
        printf("Error Clearing Stats on device\n");
        return -1;
    }

    // this is our host memory that we will copy to the graphics card
    if ((device.host_memory = malloc(device.device_global_memory_len)) == NULL) {
        printf("Error allocating memory on host\n");
        return -1;
    }

    // put our target hash into the GPU constant memory as this will not change (and we can't spare shared memory for speed)
    if (cudaMemcpyToSymbol("target_hash", device.target_hash, 16, 0, cudaMemcpyHostToDevice) != CUDA_SUCCESS) {
        printf("Error initalizing constants\n");
        return -1;
    }

#ifdef BENCHMARK
    // these will be used to benchmark
    int counter = 0;
    struct timeval start, end;

    gettimeofday(&start, NULL);
#endif

    int z;

    while(available_words) {
        memset(device.host_memory, 0, device.device_global_memory_len);

        for(x=0; x < (device.device_global_memory_len / 64) && file.words[current_words] != (char *)0; x++, current_words++) {
#ifdef BENCHMARK
            counter++;		// increment counter for this word
#endif
            output = md5_pad(file.words[current_words]);
            memcpy(device.host_memory + (x * 64), output, 64);
        }

        if (file.words[current_words] == (char *)0) {
            // read some more words !
            current_words = 0;
            if (!read_wordlist(&file)) {
                // no more words available
                available_words = 0;
                // we continue as we want to flush the cache !
            }
        }


        // now we need to transfer the MD5 hashes to the graphics card for preperation

        if (cudaMemcpy(device.device_global_memory, device.host_memory, device.device_global_memory_len, cudaMemcpyHostToDevice) != CUDA_SUCCESS) {
            printf("Error Copying Words to GPU\n");
            return -1;
        }

        md5_calculate(&device);		// launch the kernel of the CUDA device

        if (cudaMemcpy(&device.stats, device.device_stats_memory, sizeof(struct device_stats), cudaMemcpyDeviceToHost) != CUDA_SUCCESS) {
            printf("Error Copying STATS from the GPU\n");
            return -1;
        }


#ifdef DEBUG
        // For debug, we will receive the hashes for verification
        memset(device.host_memory, 0, device.device_global_memory_len);
        if (cudaMemcpy(device.host_memory, device.device_debug_memory, device.device_global_memory_len, cudaMemcpyDeviceToHost) != CUDA_SUCCESS) {
            printf("Error Copying words to GPU\n");
            return;
        }

        cudaThreadSynchronize();

        // prints out the debug hash'es
        printf("MD5 registers:\n\n");
        unsigned int *m = (unsigned int *)device.host_memory;
        for(y=0; y <= (device.max_blocks * device.max_threads); y++) {
            printf("------ [%d] -------\n", y);
            printf("A: %08x\n", m[(y * 4) + 0]);
            printf("B: %08x\n", m[(y * 4) + 1]);
            printf("C: %08x\n", m[(y * 4) + 2]);
            printf("D: %08x\n", m[(y * 4) + 3]);
            printf("-------------------\n\n");
        }
#endif

        if (device.stats.hash_found == 1) {
            printf("WORD FOUND: [%s]\n", md5_unpad(device.stats.word));
            break;
        }
    }

    if (device.stats.hash_found != 1) {
        printf("No word could be found for the provided MD5 hash\n");
    }

#ifdef BENCHMARK
    gettimeofday(&end, NULL);
    long long time = (end.tv_sec * (unsigned int)1e6 + end.tv_usec) - (start.tv_sec * (unsigned int)1e6 + start.tv_usec);
    printf("Time taken to check %d hashes: %f seconds\n", counter, (float)((float)time / 1000.0) / 1000.0);
    printf("Words per second: %d\n", counter / (time / 1000) * 1000);
#endif
}