int make_ext4fs_internal(int fd, const char *directory, char *mountpoint, fs_config_func_t fs_config_func, int gzip, int sparse, int crc, int wipe, int init_itabs, struct selabel_handle *sehnd) { u32 root_inode_num; u16 root_mode; if (setjmp(setjmp_env)) return EXIT_FAILURE; /* Handle a call to longjmp() */ if (info.len <= 0) info.len = get_file_size(fd); if (info.len <= 0) { fprintf(stderr, "Need size of filesystem\n"); return EXIT_FAILURE; } if (info.block_size <= 0) info.block_size = compute_block_size(); /* Round down the filesystem length to be a multiple of the block size */ info.len &= ~((u64)info.block_size - 1); if (info.journal_blocks == 0) info.journal_blocks = compute_journal_blocks(); if (info.no_journal == 0) info.feat_compat = EXT4_FEATURE_COMPAT_HAS_JOURNAL; else info.journal_blocks = 0; if (info.blocks_per_group <= 0) info.blocks_per_group = compute_blocks_per_group(); if (info.inodes <= 0) info.inodes = compute_inodes(); if (info.inode_size <= 0) info.inode_size = 256; if (info.label == NULL) info.label = ""; info.inodes_per_group = compute_inodes_per_group(); info.feat_compat |= EXT4_FEATURE_COMPAT_RESIZE_INODE; info.feat_ro_compat |= EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER | EXT4_FEATURE_RO_COMPAT_LARGE_FILE; info.feat_incompat |= EXT4_FEATURE_INCOMPAT_EXTENTS | EXT4_FEATURE_INCOMPAT_FILETYPE; info.bg_desc_reserve_blocks = compute_bg_desc_reserve_blocks(); printf("Creating filesystem with parameters:\n"); printf(" Size: %llu\n", info.len); printf(" Block size: %d\n", info.block_size); printf(" Blocks per group: %d\n", info.blocks_per_group); printf(" Inodes per group: %d\n", info.inodes_per_group); printf(" Inode size: %d\n", info.inode_size); printf(" Journal blocks: %d\n", info.journal_blocks); printf(" Label: %s\n", info.label); ext4_create_fs_aux_info(); printf(" Blocks: %llu\n", aux_info.len_blocks); printf(" Block groups: %d\n", aux_info.groups); printf(" Reserved block group size: %d\n", info.bg_desc_reserve_blocks); info.sparse_file = sparse_file_new(info.block_size, info.len); block_allocator_init(); ext4_fill_in_sb(); MTK_add_mountpoint(aux_info.sb,mountpoint); if (reserve_inodes(0, 10) == EXT4_ALLOCATE_FAILED) error("failed to reserve first 10 inodes"); if (info.feat_compat & EXT4_FEATURE_COMPAT_HAS_JOURNAL) ext4_create_journal_inode(); if (info.feat_compat & EXT4_FEATURE_COMPAT_RESIZE_INODE) ext4_create_resize_inode(); #ifdef USE_MINGW // Windows needs only 'create an empty fs image' functionality assert(!directory); root_inode_num = build_default_directory_structure(); #else if (directory) root_inode_num = build_directory_structure(directory, mountpoint, 0, fs_config_func, sehnd); else root_inode_num = build_default_directory_structure(); #endif root_mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; inode_set_permissions(root_inode_num, root_mode, 0, 0, 0); #ifdef HAVE_SELINUX if (sehnd) { char *sepath = NULL; char *secontext = NULL; if (mountpoint[0] == '/') sepath = strdup(mountpoint); else asprintf(&sepath, "/%s", mountpoint); if (!sepath) critical_error_errno("malloc"); if (selabel_lookup(sehnd, &secontext, sepath, S_IFDIR) < 0) { error("cannot lookup security context for %s", sepath); } if (secontext) { printf("Labeling %s as %s\n", sepath, secontext); inode_set_selinux(root_inode_num, secontext); } free(sepath); freecon(secontext); } #endif ext4_update_free(); if (init_itabs) init_unused_inode_tables(); ext4_queue_sb(); printf("Created filesystem with %d/%d inodes and %d/%d blocks\n", aux_info.sb->s_inodes_count - aux_info.sb->s_free_inodes_count, aux_info.sb->s_inodes_count, aux_info.sb->s_blocks_count_lo - aux_info.sb->s_free_blocks_count_lo, aux_info.sb->s_blocks_count_lo); if (wipe) wipe_block_device(fd, info.len); write_ext4_image(fd, gzip, sparse, crc); sparse_file_destroy(info.sparse_file); info.sparse_file = NULL; return 0; }
int make_ext4fs_internal(int fd, const char *_directory, fs_config_func_t fs_config_func, int gzip, int sparse, int crc, int wipe, int verbose, time_t fixed_time, FILE* block_list_file) { u32 root_inode_num; u16 root_mode; char *directory = NULL; if (setjmp(setjmp_env)) return EXIT_FAILURE; /* Handle a call to longjmp() */ if (_directory == NULL) { fprintf(stderr, "Need a source directory\n"); return EXIT_FAILURE; } directory = canonicalize_rel_slashes(_directory); if (info.len <= 0) info.len = get_file_size(fd); if (info.len <= 0) { fprintf(stderr, "Need size of filesystem\n"); return EXIT_FAILURE; } if (info.block_size <= 0) info.block_size = compute_block_size(); /* Round down the filesystem length to be a multiple of the block size */ info.len &= ~((u64)info.block_size - 1); if (info.journal_blocks == 0) info.journal_blocks = compute_journal_blocks(); if (info.no_journal == 0) info.feat_compat = EXT4_FEATURE_COMPAT_HAS_JOURNAL; else info.journal_blocks = 0; if (info.blocks_per_group <= 0) info.blocks_per_group = compute_blocks_per_group(); if (info.inodes <= 0) info.inodes = compute_inodes(); if (info.inode_size <= 0) info.inode_size = 256; if (info.label == NULL) info.label = ""; info.inodes_per_group = compute_inodes_per_group(); info.feat_compat |= EXT4_FEATURE_COMPAT_RESIZE_INODE | EXT4_FEATURE_COMPAT_EXT_ATTR; info.feat_ro_compat |= EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER | EXT4_FEATURE_RO_COMPAT_LARGE_FILE | EXT4_FEATURE_RO_COMPAT_GDT_CSUM; info.feat_incompat |= EXT4_FEATURE_INCOMPAT_EXTENTS | EXT4_FEATURE_INCOMPAT_FILETYPE; info.bg_desc_reserve_blocks = compute_bg_desc_reserve_blocks(); printf("Creating filesystem with parameters:\n"); printf(" Size: %"PRIu64"\n", info.len); printf(" Block size: %d\n", info.block_size); printf(" Blocks per group: %d\n", info.blocks_per_group); printf(" Inodes per group: %d\n", info.inodes_per_group); printf(" Inode size: %d\n", info.inode_size); printf(" Journal blocks: %d\n", info.journal_blocks); printf(" Label: %s\n", info.label); ext4_create_fs_aux_info(); printf(" Blocks: %"PRIu64"\n", aux_info.len_blocks); printf(" Block groups: %d\n", aux_info.groups); printf(" Reserved blocks: %"PRIu64"\n", (aux_info.len_blocks / 100) * info.reserve_pcnt); printf(" Reserved block group size: %d\n", info.bg_desc_reserve_blocks); ext4_sparse_file = sparse_file_new(info.block_size, info.len); block_allocator_init(); ext4_fill_in_sb(); if (reserve_inodes(0, 10) == EXT4_ALLOCATE_FAILED) error("failed to reserve first 10 inodes"); if (info.feat_compat & EXT4_FEATURE_COMPAT_HAS_JOURNAL) ext4_create_journal_inode(); if (info.feat_compat & EXT4_FEATURE_COMPAT_RESIZE_INODE) ext4_create_resize_inode(); root_inode_num = build_directory_structure(directory, "", 0, fs_config_func, verbose, fixed_time); root_mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; inode_set_permissions(root_inode_num, root_mode, 0, 0, 0); ext4_update_free(); ext4_queue_sb(); if (block_list_file) { size_t dirlen = strlen(directory); struct block_allocation* p = get_saved_allocation_chain(); while (p) { if (strncmp(p->filename, directory, dirlen) == 0) { fprintf(block_list_file, "%s", p->filename + dirlen); } else { fprintf(block_list_file, "%s", p->filename); } print_blocks(block_list_file, p); struct block_allocation* pn = p->next; free_alloc(p); p = pn; } } printf("Created filesystem with %d/%d inodes and %d/%d blocks\n", aux_info.sb->s_inodes_count - aux_info.sb->s_free_inodes_count, aux_info.sb->s_inodes_count, aux_info.sb->s_blocks_count_lo - aux_info.sb->s_free_blocks_count_lo, aux_info.sb->s_blocks_count_lo); if (wipe && WIPE_IS_SUPPORTED) { wipe_block_device(fd, info.len); } write_ext4_image(fd, gzip, sparse, crc); sparse_file_destroy(ext4_sparse_file); ext4_sparse_file = NULL; free(directory); return 0; }
int make_ext4fs(const char *filename, const char *directory, char *mountpoint, int android, int gzip, int sparse) { u32 root_inode_num; u16 root_mode; if (info.len == 0) info.len = get_file_size(filename); if (info.len <= 0) { fprintf(stderr, "Need size of filesystem\n"); return EXIT_FAILURE; } if (info.block_size <= 0) info.block_size = compute_block_size(); if (info.journal_blocks == 0) info.journal_blocks = compute_journal_blocks(); if (info.no_journal == 0) info.feat_compat = EXT4_FEATURE_COMPAT_HAS_JOURNAL; else info.journal_blocks = 0; if (info.blocks_per_group <= 0) info.blocks_per_group = compute_blocks_per_group(); if (info.inodes <= 0) info.inodes = compute_inodes(); if (info.inode_size <= 0) info.inode_size = 256; if (info.label == NULL) info.label = ""; info.inodes_per_group = compute_inodes_per_group(); info.feat_compat |= EXT4_FEATURE_COMPAT_RESIZE_INODE; info.feat_ro_compat |= EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER | EXT4_FEATURE_RO_COMPAT_LARGE_FILE; info.feat_incompat |= EXT4_FEATURE_INCOMPAT_EXTENTS | EXT4_FEATURE_INCOMPAT_FILETYPE; printf("Creating filesystem with parameters:\n"); printf(" Size: %llu\n", info.len); printf(" Block size: %d\n", info.block_size); printf(" Blocks per group: %d\n", info.blocks_per_group); printf(" Inodes per group: %d\n", info.inodes_per_group); printf(" Inode size: %d\n", info.inode_size); printf(" Journal blocks: %d\n", info.journal_blocks); printf(" Label: %s\n", info.label); ext4_create_fs_aux_info(); printf(" Blocks: %llu\n", aux_info.len_blocks); printf(" Block groups: %d\n", aux_info.groups); printf(" Reserved block group size: %d\n", aux_info.bg_desc_reserve_blocks); block_allocator_init(); ext4_fill_in_sb(); if (reserve_inodes(0, 10) == EXT4_ALLOCATE_FAILED) error("failed to reserve first 10 inodes"); if (info.feat_compat & EXT4_FEATURE_COMPAT_HAS_JOURNAL) ext4_create_journal_inode(); if (info.feat_compat & EXT4_FEATURE_COMPAT_RESIZE_INODE) ext4_create_resize_inode(); if (directory) root_inode_num = build_directory_structure(directory, mountpoint, 0, android); else root_inode_num = build_default_directory_structure(); root_mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; inode_set_permissions(root_inode_num, root_mode, 0, 0, 0); ext4_update_free(); printf("Created filesystem with %d/%d inodes and %d/%d blocks\n", aux_info.sb->s_inodes_count - aux_info.sb->s_free_inodes_count, aux_info.sb->s_inodes_count, aux_info.sb->s_blocks_count_lo - aux_info.sb->s_free_blocks_count_lo, aux_info.sb->s_blocks_count_lo); write_ext4_image(filename, gzip, sparse); return 0; }
/** * The main host function called from outside, as part of the API for a single node. */ unsigned int NumericFormFactorC::compute_form_factor(int rank, // #ifndef __SSE3__ real_vec_t &shape_def, // #else // real_t* shape_def, unsigned int num_triangles, // #endif complex_t* &ff, real_t* &qx, int nqx, real_t* &qy, int nqy, complex_t* &qz, int nqz, real_t* &rot, real_t& kernel_time, real_t& red_time, real_t& mem_time #ifdef FINDBLOCK , const int block_x, const int block_y, const int block_z, const int block_t #endif ) { double temp_mem_time = 0.0, total_mem_time = 0.0; #ifdef _OPENMP if(rank == 0) std::cout << "++ Number of OpenMP threads: " << omp_get_max_threads() << std::endl; #endif // #ifndef __SSE3__ unsigned int num_triangles = shape_def.size() / CPU_T_PROP_SIZE_; // #endif if(num_triangles < 1) return 0; // #ifdef INTEL_SB_AVX // unsigned int shape_padding = (32 - (num_triangles & 31)) & 31; // #elif defined __SSE3__ // unsigned int shape_padding = (16 - (num_triangles & 15)) & 15; // #endif //#ifndef FF_NUM_CPU_PADDING unsigned long int total_qpoints = nqx * nqy * nqz; unsigned long int host_mem_usage = ((unsigned long int) nqx + nqy) * sizeof(real_t) + nqz * sizeof(complex_t); //#else // padding to 16 bytes //const unsigned int PAD_LINE_ = 16; //unsigned int pad_x = 0; //if(nqx != 1) pad_x = (PAD_LINE_ - (nqx % PAD_LINE_)) % PAD_LINE_; //unsigned int pad_y = (PAD_LINE_ - (nqy % PAD_LINE_)) % PAD_LINE_; //unsigned int pad_z = (PAD_LINE_ - (nqz % PAD_LINE_)) % PAD_LINE_; //unsigned int pnqx = nqx + pad_x, pnqy = nqy + pad_y, pnqz = nqz + pad_z; //unsigned long int total_qpoints = pnqx * pnqy * pnqz; //unsigned long int host_mem_usage = ((unsigned long int) pnqx + pnqy) * sizeof(real_t) + // pnqz * sizeof(complex_t); //#endif // allocate memory for the final FF 3D matrix ff = new (std::nothrow) complex_t[total_qpoints]; // allocate and initialize to 0 memset(ff, 0, total_qpoints * sizeof(complex_t)); if(ff == NULL) { std::cerr << "Memory allocation failed for ff. Size = " << total_qpoints * sizeof(complex_t) << " b" << std::endl; return 0; } // if host_mem_usage += total_qpoints * sizeof(complex_t); //unsigned long int matrix_size = (unsigned long int) nqx * nqy * nqz * num_triangles; // do hyperblocking to use less memory unsigned int b_nqx = 0, b_nqy = 0, b_nqz = 0, b_num_triangles = 0; #ifndef FF_NUM_CPU_AUTOTUNE_HB compute_block_size(nqx, nqy, nqz, num_triangles, b_nqx, b_nqy, b_nqz, b_num_triangles #ifdef FINDBLOCK , block_x, block_y, block_z, block_t #endif ); #else std::cout << "-- Autotuning hyperblock size ... " << std::endl; double min_time_hb = 1000000.0; unsigned int min_b_nqx = 1, min_b_nqy = 1, min_b_nqz = 1, min_b_num_triangles = 1; woo::BoostChronoTimer at_kernel_timer, at_overhead_timer; at_overhead_timer.start(); complex_t* ff_temp; ff_temp = new (std::nothrow) complex_t[nqx * nqy * nqz]; for(int b_nqx_i = 1; b_nqx_i <= nqx; ++ b_nqx_i) { for(int b_nqy_i = 10; b_nqy_i <= nqy; b_nqy_i += 10) { for(int b_nqz_i = 10; b_nqz_i <= nqz; b_nqz_i += 10) { for(int b_nt_i = 10; b_nt_i <= num_triangles; b_nt_i += 10) { at_kernel_timer.start(); // compute the number of sub-blocks, along each of the 4 dimensions unsigned int nb_x = (unsigned int) ceil((float) nqx / b_nqx_i); unsigned int nb_y = (unsigned int) ceil((float) nqy / b_nqy_i); unsigned int nb_z = (unsigned int) ceil((float) nqz / b_nqz_i); unsigned int nb_t = (unsigned int) ceil((float) num_triangles / b_nt_i); unsigned int num_blocks = nb_x * nb_y * nb_z * nb_t; form_factor_kernel_fused_nqx1(qx, qy, qz, shape_def, b_nqx_i, b_nqy_i, b_nqz_i, b_nt_i, b_nqx_i, b_nqy_i, b_nqz_i, b_nt_i, nqx, nqy, nqz, num_triangles, 0, 0, 0, 0, rot, ff); at_kernel_timer.stop(); double curr_time = at_kernel_timer.elapsed_msec(); double tot_time = curr_time * num_blocks; std::cout << "## " << b_nqx_i << " x " << b_nqy_i << " x " << b_nqz_i << " x " << b_nt_i << "\t" << num_blocks << "\t:\t" << curr_time << "\t" << tot_time << std::endl; if(tot_time < min_time_hb) { min_time_hb = tot_time; min_b_nqx = b_nqx_i; min_b_nqy = b_nqy_i; min_b_nqz = b_nqz_i; min_b_num_triangles = b_nt_i; } // if } // for } // for } // for } // for delete[] ff_temp; at_overhead_timer.stop(); b_nqx = min_b_nqx; b_nqy = min_b_nqy; b_nqz = min_b_nqz; b_num_triangles = min_b_num_triangles; if(rank == 0) { std::cout << "## HBlock Autotuner overhead: " << at_overhead_timer.elapsed_msec() << " ms." << std::endl; } // if #endif unsigned long int blocked_3d_matrix_size = (unsigned long int) b_nqx * b_nqy * b_nqz; //size_t estimated_host_mem_need = host_mem_usage + blocked_matrix_size * sizeof(complex_t); //if(rank == 0) { // std::cout << "++ Estimated host memory need: " << (float) estimated_host_mem_need / 1024 / 1024 // << " MB" << std::endl; //} // if #ifndef FF_NUM_CPU_FUSED unsigned long int blocked_matrix_size = (unsigned long int) blocked_3d_matrix_size * b_num_triangles; host_mem_usage += blocked_matrix_size * sizeof(complex_t); complex_t *fq_buffer = new (std::nothrow) complex_t[blocked_matrix_size](); if(fq_buffer == NULL) { std::cerr << "Memory allocation failed for fq_buffer. blocked_matrix_size = " << blocked_matrix_size << std::endl << "Host memory usage = " << (float) host_mem_usage / 1024 / 1024 << " MB" << std::endl; delete[] ff; return 0; } // if #endif if(rank == 0) { std::cout << "++ Host memory usage: " << (float) host_mem_usage / 1024 / 1024 << " MB" << std::endl << std::flush; } // if // compute the number of sub-blocks, along each of the 4 dimensions // formulate loops over each dimension, to go over each sub block unsigned int nb_x = (unsigned int) ceil((float) nqx / b_nqx); unsigned int nb_y = (unsigned int) ceil((float) nqy / b_nqy); unsigned int nb_z = (unsigned int) ceil((float) nqz / b_nqz); unsigned int nb_t = (unsigned int) ceil((float) num_triangles / b_num_triangles); unsigned int curr_b_nqx = b_nqx, curr_b_nqy = b_nqy, curr_b_nqz = b_nqz; unsigned int curr_b_num_triangles = b_num_triangles; unsigned int num_blocks = nb_x * nb_y * nb_z * nb_t; #ifdef TIME_DETAIL_2 if(rank == 0) { std::cout << "++ Hyperblock size: " << b_nqx << " x " << b_nqy << " x " << b_nqz << " x " << b_num_triangles << std::endl; std::cout << "++ Number of decomposed Hblocks: " << num_blocks << " [" << nb_x << " x " << nb_y << " x " << nb_z << " x " << nb_t << "]" << std::endl; } // if #endif // TIME_DETAIL_2 unsigned int block_num = 0; #ifdef PROFILE_PAPI long long int papi_total_cycles = 0, papi_total_inst = 0, papi_total_flop = 0; double overall_ipc = 0.0; #endif if(rank == 0) std::cout << "-- Computing form factor on CPU ... " << std::flush; woo::BoostChronoTimer kernel_timer; kernel_timer.start(); // compute for each hyperblock curr_b_nqx = b_nqx; for(unsigned int ib_x = 0; ib_x < nb_x; ++ ib_x) { if(ib_x == nb_x - 1) curr_b_nqx = nqx - b_nqx * ib_x; curr_b_nqy = b_nqy; for(unsigned int ib_y = 0; ib_y < nb_y; ++ ib_y) { if(ib_y == nb_y - 1) curr_b_nqy = nqy - b_nqy * ib_y; curr_b_nqz = b_nqz; for(unsigned int ib_z = 0; ib_z < nb_z; ++ ib_z) { if(ib_z == nb_z - 1) curr_b_nqz = nqz - b_nqz * ib_z; curr_b_num_triangles = b_num_triangles; for(unsigned int ib_t = 0; ib_t < nb_t; ++ ib_t) { if(ib_t == nb_t - 1) curr_b_num_triangles = num_triangles - b_num_triangles * ib_t; #ifdef PROFILE_PAPI // PAPI_L1_DCM 0x80000000 No Level 1 data cache misses // PAPI_L1_ICM 0x80000001 No Level 1 instruction cache misses // PAPI_L2_DCM 0x80000002 No Level 2 data cache misses // PAPI_L2_ICM 0x80000003 No Level 2 instruction cache misses // PAPI_L1_TCM 0x80000006 Yes Level 1 cache misses // PAPI_L2_TCM 0x80000007 No Level 2 cache misses // PAPI_FPU_IDL 0x80000012 No Cycles floating point units are idle // PAPI_TLB_DM 0x80000014 No Data translation lookaside buffer misses // PAPI_TLB_IM 0x80000015 No Instruction translation lookaside buffer misses // PAPI_TLB_TL 0x80000016 Yes Total translation lookaside buffer misses // PAPI_STL_ICY 0x80000025 No Cycles with no instruction issue // PAPI_HW_INT 0x80000029 No Hardware interrupts // PAPI_BR_TKN 0x8000002c No Conditional branch instructions taken // PAPI_BR_MSP 0x8000002e No Conditional branch instructions mispredicted // PAPI_TOT_INS 0x80000032 No Instructions completed // PAPI_FP_INS 0x80000034 No Floating point instructions // PAPI_BR_INS 0x80000037 No Branch instructions // PAPI_VEC_INS 0x80000038 No Vector/SIMD instructions (could include integer) // PAPI_RES_STL 0x80000039 No Cycles stalled on any resource // PAPI_TOT_CYC 0x8000003b No Total cycles // PAPI_L1_DCH 0x8000003e Yes Level 1 data cache hits // PAPI_L2_DCH 0x8000003f Yes Level 2 data cache hits // PAPI_L1_DCA 0x80000040 No Level 1 data cache accesses // PAPI_L2_DCA 0x80000041 No Level 2 data cache accesses // PAPI_L1_ICH 0x80000049 Yes Level 1 instruction cache hits // PAPI_L2_ICH 0x8000004a No Level 2 instruction cache hits // PAPI_L1_ICA 0x8000004c No Level 1 instruction cache accesses // PAPI_L2_ICA 0x8000004d No Level 2 instruction cache accesses // PAPI_L1_ICR 0x8000004f No Level 1 instruction cache reads // PAPI_L1_TCH 0x80000055 Yes Level 1 total cache hits // PAPI_L2_TCH 0x80000056 Yes Level 2 total cache hits // PAPI_L1_TCA 0x80000058 Yes Level 1 total cache accesses // PAPI_L2_TCA 0x80000059 No Level 2 total cache accesses // PAPI_FML_INS 0x80000061 No Floating point multiply instructions // PAPI_FAD_INS 0x80000062 No Floating point add instructions // (Also includes subtract instructions) // PAPI_FDV_INS 0x80000063 No Floating point divide instructions // (Counts both divide and square root instructions) // PAPI_FSQ_INS 0x80000064 No Floating point square root instructions // (Counts both divide and square root instructions) // PAPI_FP_OPS 0x80000066 No Floating point operations // PAPI_SP_OPS 0x80000067 No Floating point operations; optimized to count // scaled single precision vector operations // PAPI_DP_OPS 0x80000068 No Floating point operations; optimized to count // scaled double precision vector operations int papi_events[3] = { PAPI_TOT_CYC, PAPI_TOT_INS, PAPI_FP_OPS }; //int papi_events[3] = { PAPI_FML_INS, PAPI_FAD_INS, PAPI_FDV_INS }; //int papi_events[3] = { PAPI_FP_OPS, PAPI_SP_OPS, PAPI_DP_OPS }; long long papi_counter_values[3]; PAPI_start_counters(papi_events, 3); #endif // call the main kernel #ifndef FF_NUM_CPU_FUSED // DO NOT USE THIS form_factor_kernel(qx, qy, qz, shape_def, curr_b_nqx, curr_b_nqy, curr_b_nqz, curr_b_num_triangles, b_nqx, b_nqy, b_nqz, b_num_triangles, ib_x, ib_y, ib_z, ib_t, fq_buffer); #else if(nqx == 1) { form_factor_kernel_fused_nqx1(qx, qy, qz, shape_def, //form_factor_kernel_fused_nqx1_unroll4(qx, qy, qz, shape_def, curr_b_nqx, curr_b_nqy, curr_b_nqz, curr_b_num_triangles, b_nqx, b_nqy, b_nqz, b_num_triangles, nqx, nqy, nqz, num_triangles, ib_x, ib_y, ib_z, ib_t, rot, ff); } else { // #ifdef __SSE3__ // if(rank == 0) // std::cout << "uh-oh: no SSE3 version!" << std::endl; // #else form_factor_kernel_fused_unroll4(qx, qy, qz, shape_def, curr_b_nqx, curr_b_nqy, curr_b_nqz, curr_b_num_triangles, b_nqx, b_nqy, b_nqz, b_num_triangles, nqx, nqy, nqz, num_triangles, ib_x, ib_y, ib_z, ib_t, rot, ff); // #endif // __SSE3__ } // if-else #endif #ifndef FF_NUM_CPU_FUSED // DO NOT USE THIS // call the reduction kernel reduction_kernel(curr_b_nqx, curr_b_nqy, curr_b_nqz, curr_b_num_triangles, blocked_matrix_size, b_nqx, b_nqy, b_nqz, num_triangles, nqx, nqy, nqz, ib_x, ib_y, ib_z, ib_t, fq_buffer, ff); #endif #ifdef PROFILE_PAPI PAPI_stop_counters(papi_counter_values, 3); papi_total_cycles += papi_counter_values[0]; papi_total_inst += papi_counter_values[1]; papi_total_flop += papi_counter_values[2]; #endif } // for ib_t } // for ib_z } // for ib_y } // for ib_x kernel_timer.stop(); kernel_time = kernel_timer.elapsed_msec(); #ifndef FF_NUM_CPU_FUSED delete[] fq_buffer; #endif if(rank == 0) std::cout << "done." << std::endl; #ifdef PROFILE_PAPI if(rank == 0) { std::cout << "++ PAPI_TOT_CYC: " << papi_total_cycles << std::endl; std::cout << "++ PAPI_TOT_INS: " << papi_total_inst << std::endl; std::cout << "++ PAPI_FP_OPS: " << papi_total_flop << std::endl; std::cout << "++ IPC: " << (double) papi_total_inst / papi_total_cycles << std::endl; } // if #endif return num_triangles; } // NumericFormFactorC::compute_form_factor()
int make_ext4fs_internal(int fd, const char *_directory, const char *_target_out_directory, const char *_mountpoint, fs_config_func_t fs_config_func, int gzip, int sparse, int crc, int wipe, int real_uuid, struct selabel_handle *sehnd, int verbose, time_t fixed_time, FILE* block_list_file) { u32 root_inode_num; u16 root_mode; char *mountpoint; char *directory = NULL; char *target_out_directory = NULL; if (setjmp(setjmp_env)) return EXIT_FAILURE; /* Handle a call to longjmp() */ info.block_device = is_block_device_fd(fd); if (info.block_device && (sparse || gzip || crc)) { fprintf(stderr, "No sparse/gzip/crc allowed for block device\n"); return EXIT_FAILURE; } if (_mountpoint == NULL) { mountpoint = strdup(""); } else { mountpoint = canonicalize_abs_slashes(_mountpoint); } if (_directory) { directory = canonicalize_rel_slashes(_directory); } if (_target_out_directory) { target_out_directory = canonicalize_rel_slashes(_target_out_directory); } if (info.len <= 0) info.len = get_file_size(fd); if (info.len <= 0) { fprintf(stderr, "Need size of filesystem\n"); return EXIT_FAILURE; } if (info.block_size <= 0) info.block_size = compute_block_size(); /* Round down the filesystem length to be a multiple of the block size */ info.len &= ~((u64)info.block_size - 1); if (info.journal_blocks == 0) info.journal_blocks = compute_journal_blocks(); if (info.no_journal == 0) info.feat_compat = EXT4_FEATURE_COMPAT_HAS_JOURNAL; else info.journal_blocks = 0; if (info.blocks_per_group <= 0) info.blocks_per_group = compute_blocks_per_group(); if (info.inodes <= 0) info.inodes = compute_inodes(); if (info.inode_size <= 0) info.inode_size = 256; if (info.label == NULL) info.label = ""; info.inodes_per_group = compute_inodes_per_group(); info.feat_compat |= EXT4_FEATURE_COMPAT_RESIZE_INODE | EXT4_FEATURE_COMPAT_EXT_ATTR; info.feat_ro_compat |= EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER | EXT4_FEATURE_RO_COMPAT_LARGE_FILE | EXT4_FEATURE_RO_COMPAT_GDT_CSUM; info.feat_incompat |= EXT4_FEATURE_INCOMPAT_EXTENTS | EXT4_FEATURE_INCOMPAT_FILETYPE; info.bg_desc_reserve_blocks = compute_bg_desc_reserve_blocks(); printf("Creating filesystem with parameters:\n"); printf(" Size: %"PRIu64"\n", info.len); printf(" Block size: %d\n", info.block_size); printf(" Blocks per group: %d\n", info.blocks_per_group); printf(" Inodes per group: %d\n", info.inodes_per_group); printf(" Inode size: %d\n", info.inode_size); printf(" Journal blocks: %d\n", info.journal_blocks); printf(" Label: %s\n", info.label); ext4_create_fs_aux_info(); printf(" Blocks: %"PRIu64"\n", aux_info.len_blocks); printf(" Block groups: %d\n", aux_info.groups); printf(" Reserved block group size: %d\n", info.bg_desc_reserve_blocks); ext4_sparse_file = sparse_file_new(info.block_size, info.len); block_allocator_init(); ext4_fill_in_sb(real_uuid); if (reserve_inodes(0, 10) == EXT4_ALLOCATE_FAILED) error("failed to reserve first 10 inodes"); if (info.feat_compat & EXT4_FEATURE_COMPAT_HAS_JOURNAL) ext4_create_journal_inode(); if (info.feat_compat & EXT4_FEATURE_COMPAT_RESIZE_INODE) ext4_create_resize_inode(); #ifdef USE_MINGW // Windows needs only 'create an empty fs image' functionality assert(!directory); root_inode_num = build_default_directory_structure(mountpoint, sehnd); #else if (directory) root_inode_num = build_directory_structure(directory, mountpoint, target_out_directory, 0, fs_config_func, sehnd, verbose, fixed_time); else root_inode_num = build_default_directory_structure(mountpoint, sehnd); #endif root_mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; inode_set_permissions(root_inode_num, root_mode, 0, 0, 0); #ifndef USE_MINGW if (sehnd) { char *secontext = NULL; if (selabel_lookup(sehnd, &secontext, mountpoint, S_IFDIR) < 0) { error("cannot lookup security context for %s", mountpoint); } if (secontext) { if (verbose) { printf("Labeling %s as %s\n", mountpoint, secontext); } inode_set_selinux(root_inode_num, secontext); } freecon(secontext); } #endif ext4_update_free(); if (block_list_file) { size_t dirlen = directory ? strlen(directory) : 0; struct block_allocation* p = get_saved_allocation_chain(); while (p) { if (directory && strncmp(p->filename, directory, dirlen) == 0) { // substitute mountpoint for the leading directory in the filename, in the output file fprintf(block_list_file, "%s%s", mountpoint, p->filename + dirlen); } else { fprintf(block_list_file, "%s", p->filename); } print_blocks(block_list_file, p); struct block_allocation* pn = p->next; free_alloc(p); p = pn; } } printf("Created filesystem with %d/%d inodes and %d/%d blocks\n", aux_info.sb->s_inodes_count - aux_info.sb->s_free_inodes_count, aux_info.sb->s_inodes_count, aux_info.sb->s_blocks_count_lo - aux_info.sb->s_free_blocks_count_lo, aux_info.sb->s_blocks_count_lo); if (wipe && WIPE_IS_SUPPORTED) { wipe_block_device(fd, info.len); } write_ext4_image(fd, gzip, sparse, crc); sparse_file_destroy(ext4_sparse_file); ext4_sparse_file = NULL; free(mountpoint); free(directory); return 0; }