예제 #1
0
 /**
  * Function to read the input shape file.
  */
 unsigned int NumericFormFactor::read_shapes_file_dat(const char* filename, real_vec_t &shape_def) {
   std::ifstream f(filename);
   if(!f.is_open()) {
     std::cout << "Cannot open file " << filename << std::endl;
     return 1;
   } // if
   real_t s = 0.0, cx = 0.0, cy = 0.0, cz = 0.0, nx = 0.0, ny = 0.0, nz = 0.0;
 
   while(true) {
     f >> s;
     if(f.eof() || !f.good()) break;
     f >> nx; f >> ny; f >> nz;
     f >> cx; f >> cy; f >> cz;
     shape_def.push_back(s);
     shape_def.push_back(nx);
     shape_def.push_back(ny);
     shape_def.push_back(nz);
     shape_def.push_back(cx);
     shape_def.push_back(cy);
     shape_def.push_back(cz);
   } // while
 
   f.close();
   return shape_def.size() / 7;
 } // NumericFormFactor::read_shapes_file_dat()
예제 #2
0
  /**
   * Function to read the shape definition input file in HDF5 format.
   */
  unsigned int NumericFormFactor::read_shapes_file(const char* filename,
//                          #ifndef __SSE3__
                            real_vec_t &shape_def
//                          #else
//                            #ifdef USE_GPU
//                              real_vec_t &shape_def
//                            #else
//                              real_t* &shape_def
//                            #endif
//                          #endif
                          ) {
    unsigned int num_triangles = 0;
    double* temp_shape_def = NULL;
  
    // TODO: shape definition is already in HigInput ...
    // utilize ...
    ShapeFileType type = get_shapes_file_format(filename);
    if(type == shape_file_data) {
      RawShapeReader temp(filename, temp_shape_def, num_triangles);
    } else if(type == shape_file_object) {
      ObjectShapeReader temp(filename, temp_shape_def, num_triangles);
    } else if(type == shape_file_hdf5) {
      #ifdef USE_PARALLEL_HDF5
        h5_shape_reader(filename, &temp_shape_def, &num_triangles);
      #else
        std::cerr << "error: use of parallel hdf5 format has not been enabled in your installation. "
                  << "Please reinstall with the support enabled." << std::endl;
        return false;
      #endif
    } else if(type == shape_file_null) {
      std::cerr << "error: shape definition file extension is null" << std::endl;
      return 0;
    } else if(type == shape_file_error) {
      std::cerr << "error: shape definition file format unknown" << std::endl;
      return 0;
    } else {
      std::cerr << "error: shape definition file format unknown" << std::endl;
      return 0;
    } // if-else

    #ifdef FF_NUM_GPU
      #ifndef KERNEL2
        for(unsigned int i = 0; i < num_triangles * 7; ++ i)
          shape_def.push_back((real_t)temp_shape_def[i]);
      #else // KERNEL2
        for(unsigned int i = 0, j = 0; i < num_triangles * T_PROP_SIZE_; ++ i) {
          if((i + 1) % T_PROP_SIZE_ == 0) shape_def.push_back((real_t) 0.0);  // padding
          else { shape_def.push_back((real_t)temp_shape_def[j]); ++ j; }
        } // for
      #endif // KERNEL2
    //#elif defined USE_MIC  // using MIC
    //  for(unsigned int i = 0; i < num_triangles * 7; ++ i)
    //    shape_def.push_back((real_t)temp_shape_def[i]);
    #else          // using CPU or MIC
//      #ifndef __SSE3__
        for(unsigned int i = 0, j = 0; i < num_triangles * CPU_T_PROP_SIZE_; ++ i) {
          if((i + 1) % CPU_T_PROP_SIZE_ == 0) shape_def.push_back((real_t) 0.0);  // padding
          else { shape_def.push_back((real_t)temp_shape_def[j]); ++ j; }
        } // for
/*      #else    // using SSE3, so store data differently: FOR CPU AND MIC (vectorization)
        #ifndef USE_MIC    // generic cpu version with SSE3 or AVX
          #ifdef INTEL_SB_AVX    // CPU version with AVX
            // group all 's', 'nx', 'ny', 'nz', 'x', 'y', 'z' together
            // for alignment at 32 bytes, make sure each of the 7 groups is padded
            // compute amount of padding
            // 32 bytes = 8 floats or 4 doubles. FIXME: assuming float only for now ...
            unsigned int padding = (8 - (num_triangles & 7)) & 7;
            unsigned int shape_size = (num_triangles + padding) * 7;
            shape_def = (real_t*) _mm_malloc(shape_size * sizeof(real_t), 32);
            if(shape_def == NULL) {
              std::cerr << "error: failed to allocate aligned memory for shape_def"
                    << std::endl;
              return 0;
            } // if
            memset(shape_def, 0, shape_size * sizeof(real_t));
            for(int i = 0; i < num_triangles; ++ i) {
              for(int j = 0; j < 7; ++ j) {
                shape_def[(num_triangles + padding) * j + i] = temp_shape_def[7 * i + j];
              } // for
            } // for
          #else        // CPU version with SSE3
            // group all 's', 'nx', 'ny', 'nz', 'x', 'y', 'z' together
            // for alignment at 16 bytes, make sure each of the 7 groups is padded
            // compute amount of padding
            // 16 bytes = 4 floats or 2 doubles. FIXME: assuming float only for now ...
            unsigned int padding = (4 - (num_triangles & 3)) & 3;
            unsigned int shape_size = (num_triangles + padding) * 7;
            shape_def = (real_t*) _mm_malloc(shape_size * sizeof(real_t), 16);
            if(shape_def == NULL) {
              std::cerr << "error: failed to allocate aligned memory for shape_def"
                    << std::endl;
              return 0;
            } // if
            memset(shape_def, 0, shape_size * sizeof(real_t));
            for(int i = 0; i < num_triangles; ++ i) {
              for(int j = 0; j < 7; ++ j) {
                shape_def[(num_triangles + padding) * j + i] = temp_shape_def[7 * i + j];
              } // for
            } // for
          #endif
        #else  // optimized for MIC only: AVX2, 64 byte alignments (512-bit vector registers)
            // FIXME: float only for now: 16 floats in one vector!
          unsigned int padding = (16 - (num_triangles & 15)) & 15;
          unsigned int shape_size = (num_triangles + padding) * 7;
          shape_def = (real_t*) _mm_malloc(shape_size * sizeof(real_t), 64);
          if(shape_def == NULL) {
            std::cerr << "error: failed to allocate aligned memory for shape_def"
                  << std::endl;
            return 0;
          } // if
          memset(shape_def, 0, shape_size * sizeof(real_t));
          for(int i = 0; i < num_triangles; ++ i) {
            for(int j = 0; j < 7; ++ j) {
              shape_def[(num_triangles + padding) * j + i] = temp_shape_def[7 * i + j];
            } // for
          } // for
          // TODO: try grouping 16 triangles together ...
          // that will give completely sequential memory access!
        #endif
      #endif // __SSE3__  */
    #endif // FF_NUM_GPU

    return num_triangles;
  } // NumericFormFactor::read_shapes_file()