void hdf5_load_nd_dataset_helper( hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, Blob<Dtype>* blob) { // Verify that the dataset exists. CHECK(H5LTfind_dataset(file_id, dataset_name_)) << "Failed to find HDF5 dataset " << dataset_name_; // Verify that the number of dimensions is in the accepted range. herr_t status; int ndims; status = H5LTget_dataset_ndims(file_id, dataset_name_, &ndims); CHECK_GE(status, 0) << "Failed to get dataset ndims for " << dataset_name_; CHECK_GE(ndims, min_dim); CHECK_LE(ndims, max_dim); // Verify that the data format is what we expect: float or double. std::vector<hsize_t> dims(ndims); H5T_class_t class_; status = H5LTget_dataset_info( file_id, dataset_name_, dims.data(), &class_, NULL); CHECK_GE(status, 0) << "Failed to get dataset info for " << dataset_name_; CHECK_EQ(class_, H5T_FLOAT) << "Expected float or double data"; vector<int> blob_dims(dims.size()); for (int i = 0; i < dims.size(); ++i) { blob_dims[i] = dims[i]; } blob->Reshape(blob_dims); }
void hdf5_load_nd_dataset_helper( hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, Blob<Dtype>* blob) { // Verify that the dataset exists. // REVIEW ktran: this check doesn't work for nested dataset name ////CHECK(H5LTfind_dataset(file_id, dataset_name_)) << "Failed to find HDF5 dataset " << dataset_name_; // Verify that the number of dimensions is in the accepted range. herr_t status; int ndims; status = H5LTget_dataset_ndims(file_id, dataset_name_, &ndims); CHECK_GE(status, 0) << "Failed to get dataset ndims for " << dataset_name_; CHECK_GE(ndims, min_dim); CHECK_LE(ndims, max_dim); // Verify that the data format is what we expect: float or double. std::vector<hsize_t> dims(ndims); H5T_class_t class_; status = H5LTget_dataset_info( file_id, dataset_name_, dims.data(), &class_, NULL); CHECK_GE(status, 0) << "Failed to get dataset info for " << dataset_name_; switch (class_) { case H5T_FLOAT: { LOG_FIRST_N(INFO, 1) << "Datatype class: H5T_FLOAT"; break; } case H5T_INTEGER: { LOG_FIRST_N(INFO, 1) << "Datatype class: H5T_INTEGER"; break; } case H5T_TIME: LOG(FATAL) << "Unsupported datatype class: H5T_TIME"; case H5T_STRING: LOG(FATAL) << "Unsupported datatype class: H5T_STRING"; case H5T_BITFIELD: LOG(FATAL) << "Unsupported datatype class: H5T_BITFIELD"; case H5T_OPAQUE: LOG(FATAL) << "Unsupported datatype class: H5T_OPAQUE"; case H5T_COMPOUND: LOG(FATAL) << "Unsupported datatype class: H5T_COMPOUND"; case H5T_REFERENCE: LOG(FATAL) << "Unsupported datatype class: H5T_REFERENCE"; case H5T_ENUM: LOG(FATAL) << "Unsupported datatype class: H5T_ENUM"; case H5T_VLEN: LOG(FATAL) << "Unsupported datatype class: H5T_VLEN"; case H5T_ARRAY: LOG(FATAL) << "Unsupported datatype class: H5T_ARRAY"; default: LOG(FATAL) << "Datatype class unknown"; } vector<int> blob_dims(dims.size()); for (int i = 0; i < dims.size(); ++i) { blob_dims[i] = dims[i]; } blob->Reshape(blob_dims); }
void hdf5_load_nd_dataset_helper( hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, Blob<Dtype>* blob) { // Verify that the dataset exists. CHECK(H5LTfind_dataset(file_id, dataset_name_)) << "Failed to find HDF5 dataset " << dataset_name_; // Verify that the number of dimensions is in the accepted range. herr_t status; int ndims; status = H5LTget_dataset_ndims(file_id, dataset_name_, &ndims); CHECK_GE(status, 0) << "Failed to get dataset ndims for " << dataset_name_; CHECK_GE(ndims, min_dim); CHECK_LE(ndims, max_dim); // Verify that the data format is what we expect: float or double. std::vector<hsize_t> dims(ndims); H5T_class_t class_; status = H5LTget_dataset_info( file_id, dataset_name_, dims.data(), &class_, NULL); CHECK_GE(status, 0) << "Failed to get dataset info for " << dataset_name_; // blocks around "LOG" macros are to avoid "initialization of occurresces_?? // is skipped by case label" errors on msvc switch (class_) { case H5T_FLOAT: { LOG_FIRST_N(INFO, 1) << "Datatype class: H5T_FLOAT"; break; } case H5T_INTEGER: { LOG_FIRST_N(INFO, 1) << "Datatype class: H5T_INTEGER"; break; } case H5T_TIME: { LOG(FATAL) << "Unsupported datatype class: H5T_TIME"; } case H5T_STRING: { LOG(FATAL) << "Unsupported datatype class: H5T_STRING"; } case H5T_BITFIELD: { LOG(FATAL) << "Unsupported datatype class: H5T_BITFIELD"; } case H5T_OPAQUE: { LOG(FATAL) << "Unsupported datatype class: H5T_OPAQUE"; } case H5T_COMPOUND: { LOG(FATAL) << "Unsupported datatype class: H5T_COMPOUND"; } case H5T_REFERENCE: { LOG(FATAL) << "Unsupported datatype class: H5T_REFERENCE"; } case H5T_ENUM: { LOG(FATAL) << "Unsupported datatype class: H5T_ENUM"; } case H5T_VLEN: { LOG(FATAL) << "Unsupported datatype class: H5T_VLEN"; } case H5T_ARRAY: { LOG(FATAL) << "Unsupported datatype class: H5T_ARRAY"; } default: { LOG(FATAL) << "Datatype class unknown"; } } vector<int> blob_dims(dims.size()); for (int i = 0; i < dims.size(); ++i) { blob_dims[i] = dims[i]; } blob->Reshape(blob_dims); }
void HDF5GeneralDataLayer<Dtype>::LoadGeneralHDF5FileData(const char* filename) { DLOG(INFO) << "Loading The general HDF5 file" << filename; hid_t file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT); if (file_id < 0) { LOG(ERROR) << "Failed opening HDF5 file" << filename; } HDF5GeneralDataParameter data_param = this->layer_param_.hdf5_general_data_param(); int fieldNum = data_param.field_size(); hdf_blobs_.resize(fieldNum); const int MIN_DATA_DIM = 1; const int MAX_DATA_DIM = 4; for(int i = 0; i < fieldNum; ++i){ //LOG(INFO) << "Data type: " << data_param.datatype(i).data(); if(i < data_param.datatype_size() && strcmp(data_param.datatype(i).data(), "int8") == 0){ // We take out the io functions here const char* dataset_name_ = data_param.field(i).data(); hdf_blobs_[i] = shared_ptr<Blob<Dtype> >(new Blob<Dtype>()); CHECK(H5LTfind_dataset(file_id, dataset_name_)) << "Failed to find HDF5 dataset " << dataset_name_; // Verify that the number of dimensions is in the accepted range. herr_t status; int ndims; status = H5LTget_dataset_ndims(file_id, dataset_name_, &ndims); CHECK_GE(status, 0) << "Failed to get dataset ndims for " << dataset_name_; CHECK_GE(ndims, MIN_DATA_DIM); CHECK_LE(ndims, MAX_DATA_DIM); // Verify that the data format is what we expect: int8 std::vector<hsize_t> dims(ndims); H5T_class_t class_; status = H5LTget_dataset_info(file_id, dataset_name_, dims.data(), &class_, NULL); CHECK_GE(status, 0) << "Failed to get dataset info for " << dataset_name_; CHECK_EQ(class_, H5T_INTEGER) << "Expected integer data"; vector<int> blob_dims(dims.size()); for (int j = 0; j < dims.size(); ++j) { blob_dims[j] = dims[j]; } hdf_blobs_[i]->Reshape(blob_dims); std::cout<<"Trying to allocate memories!\n"; int* buffer_data = new int[hdf_blobs_[i]->count()]; std::cout<<"Memories loaded!!!\n"; status = H5LTread_dataset_int(file_id, dataset_name_, buffer_data); CHECK_GE(status, 0) << "Failed to read int8 dataset " << dataset_name_; Dtype* target_data = hdf_blobs_[i]->mutable_cpu_data(); for(int j = 0; j < hdf_blobs_[i]->count(); j++){ //LOG(INFO) << Dtype(buffer_data[j]); target_data[j] = Dtype(buffer_data[j]); } delete buffer_data; }else{ // The dataset is still the float32 datatype hdf_blobs_[i] = shared_ptr<Blob<Dtype> >(new Blob<Dtype>()); hdf5_load_nd_dataset(file_id, data_param.field(i).data(), MIN_DATA_DIM, MAX_DATA_DIM, hdf_blobs_[i].get()); } } herr_t status = H5Fclose(file_id); CHECK_GE(status, 0) << "Failed to close HDF5 file " << filename; for(int i = 1; i < fieldNum; ++i){ CHECK_EQ(hdf_blobs_[0]->num(), hdf_blobs_[i]->num()); } data_permutation_.clear(); data_permutation_.resize(hdf_blobs_[0]->shape(0)); for (int i = 0; i < hdf_blobs_[0]->shape(0); i++) data_permutation_[i] = i; //TODO: DATA SHUFFLE //LOG(INFO) << "Successully loaded " << data_blob_.num() << " rows"; }