ffindex_index_t* ffindex_tree_unlink(ffindex_index_t* index, char* name_to_unlink) { if(index->tree_root == NULL) { fferror_print(__FILE__, __LINE__, __func__, "tree is NULL"); return NULL; } ffindex_entry_t search; strncpy(search.name, name_to_unlink, FFINDEX_MAX_ENTRY_NAME_LENTH); tdelete((const void *)&search, &index->tree_root, ffindex_compare_entries_by_name); return index; }
/* Insert all files from directory into ffindex */ int ffindex_insert_dir(FILE *data_file, FILE *index_file, size_t *start_offset, char *input_dir_name) { DIR *dir = opendir(input_dir_name); if(dir == NULL) { fferror_print(__FILE__, __LINE__, __func__, input_dir_name); return -1; } size_t input_dir_name_len = strlen(input_dir_name); char path[PATH_MAX]; strncpy(path, input_dir_name, NAME_MAX); if(input_dir_name[input_dir_name_len - 1] != '/') { path[input_dir_name_len] = '/'; input_dir_name_len += 1; } size_t offset = *start_offset; struct dirent *entry; while((entry = readdir(dir)) != NULL) { if(entry->d_name[0] == '.') continue; strncpy(path + input_dir_name_len, entry->d_name, NAME_MAX); struct stat sb; if(stat(path, &sb) == -1) fferror_print(__FILE__, __LINE__, __func__, path); if(!S_ISREG(sb.st_mode)) continue; ffindex_insert_file(data_file, index_file, &offset, path, entry->d_name); } closedir(dir); /* update return value */ *start_offset = offset; return 0; }
/* Insert one file by handle into ffindex */ int ffindex_insert_filestream(FILE *data_file, FILE *index_file, size_t *offset, FILE* file, char *name) { int myerrno = 0; /* copy and paste file to data file */ char buffer[FFINDEX_BUFFER_SIZE]; size_t offset_before = *offset; size_t read_size; while((read_size = fread(buffer, sizeof(char), sizeof(buffer), file)) > 0) { size_t write_size = fwrite(buffer, sizeof(char), read_size, data_file); *offset += write_size; if(read_size != write_size) fferror_print(__FILE__, __LINE__, __func__, name); } /* Seperate by '\0' and thus also make sure at least one byte is written */ buffer[0] = '\0'; fwrite(buffer, sizeof(char), 1, data_file); *offset += 1; if(ferror(data_file) != 0) goto EXCEPTION_ffindex_insert_file; /* write index entry */ fprintf(index_file, "%s\t%zd\t%zd\n", name, offset_before, *offset - offset_before); if(ferror(file) != 0) goto EXCEPTION_ffindex_insert_file; return myerrno; EXCEPTION_ffindex_insert_file: { fferror_print(__FILE__, __LINE__, __func__, ""); return myerrno; } }
ffindex_index_t* ffindex_index_parse(FILE *index_file, size_t num_max_entries) { if(num_max_entries == 0) num_max_entries = FFINDEX_MAX_INDEX_ENTRIES_DEFAULT; size_t nbytes = sizeof(ffindex_index_t) + (sizeof(ffindex_entry_t) * num_max_entries); ffindex_index_t *index = (ffindex_index_t *)malloc(nbytes); index->num_max_entries = num_max_entries; if(index == NULL) { fferror_print(__FILE__, __LINE__, __func__, "malloc failed"); return NULL; } index->file = index_file; index->index_data = ffindex_mmap_data(index_file, &(index->index_data_size)); index->type = SORTED_ARRAY; /* Assume a sorted file for now */ int i = 0; char* d = index->index_data; char* end; /* Faster than scanf per line */ for(i = 0; d < (index->index_data + index->index_data_size); i++) { int p; for(p = 0; *d != '\t'; d++) index->entries[i].name[p++] = *d; index->entries[i].name[p] = '\0'; index->entries[i].offset = strtol(d, &end, 10); d = end; index->entries[i].length = strtol(d, &end, 10); d = end + 1; /* +1 for newline */ } index->n_entries = i; if(index->n_entries == 0) return NULL; return index; }
int main(int argn, char **argv) { int mpi_error, mpi_rank, mpi_num_procs; mpi_error = MPI_Init(&argn, &argv); mpi_error = MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); mpi_error = MPI_Comm_size(MPI_COMM_WORLD, &mpi_num_procs); int opt; char *data_filename_out = NULL, *index_filename_out = NULL; while ((opt = getopt(argn, argv, "d:i:")) != -1) { switch (opt) { case 'd': data_filename_out = optarg; break; case 'i': index_filename_out = optarg; break; } } if(argn - optind < 3) { fprintf(stderr, "Not enough arguments %d.\n", optind - argn); fprintf(stderr, "USAGE: %s -d DATA_FILENAME_OUT -i INDEX_FILENAME_OUT DATA_FILENAME INDEX_FILENAME -- PROGRAM [PROGRAM_ARGS]*\n" "\nDesigned and implemented by Andy Hauser <*****@*****.**>.\n", basename(argv[0])); return -1; } read_buffer = malloc(400 * 1024 * 1024); char *data_filename = argv[optind++]; char *index_filename = argv[optind++]; char *program_name = argv[optind]; char **program_argv = argv + optind; FILE *data_file = fopen(data_filename, "r"); FILE *index_file = fopen(index_filename, "r"); if( data_file == NULL) { fferror_print(__FILE__, __LINE__, argv[0], data_filename); exit(EXIT_FAILURE); } if(index_file == NULL) { fferror_print(__FILE__, __LINE__, argv[0], index_filename); exit(EXIT_FAILURE); } FILE *data_file_out = NULL, *index_file_out = NULL; // Setup one output FFindex for each MPI process if(data_filename_out != NULL && index_filename_out != NULL) { char* data_filename_out_rank = malloc(FILENAME_MAX); char* index_filename_out_rank = malloc(FILENAME_MAX); snprintf( data_filename_out_rank, FILENAME_MAX, "%s.%d", data_filename_out, mpi_rank); snprintf(index_filename_out_rank, FILENAME_MAX, "%s.%d", index_filename_out, mpi_rank); data_file_out = fopen(data_filename_out_rank, "w+"); index_file_out = fopen(index_filename_out_rank, "w+"); if( data_file_out == NULL) { fferror_print(__FILE__, __LINE__, argv[0], data_filename_out); exit(EXIT_FAILURE); } if(index_file_out == NULL) { fferror_print(__FILE__, __LINE__, argv[0], index_filename_out); exit(EXIT_FAILURE); } } int capture_stdout = (data_file_out != NULL); size_t data_size; char *data = ffindex_mmap_data(data_file, &data_size); ffindex_index_t* index = ffindex_index_parse(index_file, 0); if(index == NULL) { fferror_print(__FILE__, __LINE__, "ffindex_index_parse", index_filename); MPI_Finalize(); exit(EXIT_FAILURE); } // Ignore SIGPIPE struct sigaction handler; handler.sa_handler = SIG_IGN; sigemptyset(&handler.sa_mask); handler.sa_flags = 0; sigaction(SIGPIPE, &handler, NULL); size_t batch_size, range_start, range_end; if(index->n_entries >= mpi_num_procs) batch_size = index->n_entries / mpi_num_procs; else batch_size = 0; range_start = mpi_rank * batch_size; range_end = range_start + batch_size; size_t offset = 0; // Foreach entry if(batch_size > 0) for(size_t entry_index = range_start; entry_index < range_end; entry_index++) { ffindex_entry_t* entry = ffindex_get_entry_by_index(index, entry_index); if(entry == NULL) { perror(entry->name); return errno; } int error = ffindex_apply_by_entry(data, index, entry, program_name, program_argv, data_file_out, index_file_out, &offset); if(error != 0) { perror(entry->name); break; } } ssize_t left_over = index->n_entries - (batch_size * mpi_num_procs); if(mpi_rank < left_over) { size_t left_over_entry_index = (batch_size * mpi_num_procs) + mpi_rank; ffindex_entry_t* entry = ffindex_get_entry_by_index(index, left_over_entry_index); if(entry == NULL) { perror(entry->name); return errno; } //fprintf(stderr, "handling left over: %ld\n", left_over_entry_index); int error = ffindex_apply_by_entry(data, index, entry, program_name, program_argv, data_file_out, index_file_out, &offset); if(error != 0) perror(entry->name); } if(capture_stdout) fclose(data_file_out); if(index_file_out != NULL) fclose(index_file_out); MPI_Barrier(MPI_COMM_WORLD); // merge FFindexes in master if(data_filename_out != NULL && mpi_rank == 0) { char* merge_command = malloc(FILENAME_MAX * 5); for(int i = 0; i < mpi_num_procs; i++) { snprintf( merge_command, FILENAME_MAX, "ffindex_build -as %s %s -d %s.%d -i %s.%d", data_filename_out, index_filename_out, data_filename_out, i, index_filename_out, i); //puts(merge_command); system(merge_command); } } MPI_Finalize(); return EXIT_SUCCESS; }
int main(int argn, char **argv) { int sort = 0, unlink = 0, version = 0, use_tree = 1; int err = EXIT_SUCCESS; char* list_filenames[MAX_FILENAME_LIST_FILES]; size_t list_filenames_index = 0; static struct option long_options[] = { { "file", required_argument, NULL, 'f' }, { "sort", no_argument, NULL, 's' }, { "tree", no_argument, NULL, 't' }, { "unlink", no_argument, NULL, 'u' }, { "version", no_argument, NULL, 'v' }, { NULL, 0, NULL, 0 } }; int opt; while (1) { int option_index = 0; opt = getopt_long(argn, argv, "stuvf:", long_options, &option_index); if (opt == -1) break; switch (opt) { case 'f': list_filenames[list_filenames_index++] = optarg; break; case 's': sort = 1; break; case 't': use_tree = 1; break; case 'u': unlink = 1; break; case 'v': version = 1; break; default: usage(argv[0]); return EXIT_FAILURE; } } if(version == 1) { /* Don't you dare running it on a platform where byte != 8 bits */ printf("%s version %.2f, off_t = %zd bits\n", argv[0], FFINDEX_VERSION, sizeof(off_t) * 8); return EXIT_SUCCESS; } if(optind >= argn) { usage(argv[0]); return EXIT_FAILURE; } char *index_filename = argv[optind++]; FILE *index_file; index_file = fopen(index_filename, "r+"); if(index_file == NULL) { perror(index_filename); return EXIT_FAILURE; } ffindex_index_t* index = ffindex_index_parse(index_file, 0); if(index == NULL) { perror("ffindex_index_parse failed"); return (EXIT_FAILURE); } fclose(index_file); /* Unlink entries */ if(unlink) { if(use_tree) { /* Build tree */ index = ffindex_index_as_tree(index); /* For each list_file unlink all entries */ if(list_filenames_index > 0) for(int i = 0; i < list_filenames_index; i++) { printf("Unlinking entries from '%s'\n", list_filenames[i]); FILE *list_file = fopen(list_filenames[i], "r"); if( list_file == NULL) { perror(list_filenames[i]); return EXIT_FAILURE; } /* unlink entries in file, one per line */ char path[PATH_MAX]; while(fgets(path, PATH_MAX, list_file) != NULL) index = ffindex_unlink(index, ffnchomp(path, strlen(path))); } /* unlink entries specified by args */ for(int i = optind; i < argn; i++) index = ffindex_unlink(index, argv[i]); } else { char** sorted_names_to_unlink = malloc(FFINDEX_MAX_INDEX_ENTRIES_DEFAULT * sizeof(char *)); if(sorted_names_to_unlink == NULL) fferror_print(__FILE__, __LINE__, __func__, "malloc failed"); /* For each list_file unlink all entries */ if(list_filenames_index > 0) for(int i = 0; i < list_filenames_index; i++) { printf("Unlinking entries from '%s'\n", list_filenames[i]); FILE *list_file = fopen(list_filenames[i], "r"); if( list_file == NULL) { perror(list_filenames[i]); return EXIT_FAILURE; } /* unlink entries in file, one per line */ char path[PATH_MAX]; while(fgets(path, PATH_MAX, list_file) != NULL) sorted_names_to_unlink[i++] = ffnchomp(strdup(path), strlen(path)); ffindex_unlink_entries(index, sorted_names_to_unlink, i); } /* unlink entries specified by args */ int y = 0; for(int i = optind; i < argn; i++, y++) sorted_names_to_unlink[y] = argv[i]; ffindex_unlink_entries(index, sorted_names_to_unlink, y); /* Sort the index entries and write back */ if(sort) { ffindex_sort_index_file(index); index_file = fopen(index_filename, "w"); if(index_file == NULL) { perror(index_filename); return EXIT_FAILURE; } err += ffindex_write(index, index_file); } } } /* Write index back */ index_file = fopen(index_filename, "w"); if(index_file == NULL) { perror(index_filename); return EXIT_FAILURE; } err += ffindex_write(index, index_file); return err; }
int main(int argn, char **argv) { int by_index = 0; static struct option long_options[] = { { "byindex", no_argument, NULL, 'n' }, { NULL, 0, NULL, 0 } }; int opt; while (1) { int option_index = 0; opt = getopt_long(argn, argv, "n", long_options, &option_index); if (opt == -1) break; switch (opt) { case 'n': by_index = 1; break; default: usage(argv[0]); return EXIT_FAILURE; } } if(argn < 3) { usage(argv[0]); return EXIT_FAILURE; } char *data_filename = argv[optind++]; char *index_filename = argv[optind++]; FILE *data_file = fopen(data_filename, "r"); FILE *index_file = fopen(index_filename, "r"); if( data_file == NULL) { fferror_print(__FILE__, __LINE__, "ffindex_get", data_filename); exit(EXIT_FAILURE); } if(index_file == NULL) { fferror_print(__FILE__, __LINE__, "ffindex_get", index_filename); exit(EXIT_FAILURE); } size_t data_size; char *data = ffindex_mmap_data(data_file, &data_size); ffindex_index_t* index = ffindex_index_parse(index_file, 0); if(index == NULL) { fferror_print(__FILE__, __LINE__, "ffindex_index_parse", index_filename); exit(EXIT_FAILURE); } if(by_index) { for(int i = optind; i < argn; i++) { size_t index_n = atol(argv[i]) - 1; // offset from 0 but specify from 1 ffindex_entry_t* entry = ffindex_get_entry_by_index(index, index_n); if(entry == NULL) { errno = ENOENT; fferror_print(__FILE__, __LINE__, "ffindex_get entry index out of range", argv[i]); } else { char *filedata = ffindex_get_data_by_entry(data, entry); if(filedata == NULL) { errno = ENOENT; fferror_print(__FILE__, __LINE__, "ffindex_get entry index out of range", argv[i]); } else fwrite(filedata, entry->length - 1, 1, stdout); } } } else // by name { for(int i = optind; i < argn; i++) { char *filename = argv[i]; ffindex_entry_t* entry = ffindex_get_entry_by_name(index, filename); if(entry == NULL) { errno = ENOENT; fferror_print(__FILE__, __LINE__, "ffindex_get key not found in index", filename); } else { char *filedata = ffindex_get_data_by_entry(data, entry); if(filedata == NULL) { errno = ENOENT; fferror_print(__FILE__, __LINE__, "ffindex_get key not found in index", filename); } else fwrite(filedata, entry->length - 1, 1, stdout); } } /* Alternative code using (slower) ffindex_fopen */ /* FILE *file = ffindex_fopen(data, index, filename); if(file == NULL) { errno = ENOENT; fferror_print(__FILE__, __LINE__, "ffindex_fopen file not found in index", filename); } else { char line[LINE_MAX]; while(fgets(line, LINE_MAX, file) != NULL) printf("%s", line); } */ } return 0; }
int main(int argn, char **argv) { if(argn < 4) { fprintf(stderr, "USAGE: %s DATA_FILENAME INDEX_FILENAME PROGRAM [PROGRAM_ARGS]*\n" "\nDesigned and implemented by Andy Hauser <*****@*****.**>.\n", argv[0]); return -1; } char *data_filename = argv[1]; char *index_filename = argv[2]; char *program_name = argv[3]; char **program_argv = argv + 3; FILE *data_file = fopen(data_filename, "r"); FILE *index_file = fopen(index_filename, "r"); if( data_file == NULL) { fferror_print(__FILE__, __LINE__, argv[0], data_filename); exit(EXIT_FAILURE); } if(index_file == NULL) { fferror_print(__FILE__, __LINE__, argv[0], index_filename); exit(EXIT_FAILURE); } size_t data_size; char *data = ffindex_mmap_data(data_file, &data_size); ffindex_index_t* index = ffindex_index_parse(index_file, 0); if(index == NULL) { fferror_print(__FILE__, __LINE__, "ffindex_index_parse", index_filename); exit(EXIT_FAILURE); } // Ignore SIGPIPE struct sigaction handler; handler.sa_handler = SIG_IGN; sigemptyset(&handler.sa_mask); handler.sa_flags = 0; sigaction(SIGPIPE, &handler, NULL); size_t range_start = 0; size_t range_end = index->n_entries; // Foreach entry //#pragma omp parallel for for(size_t entry_index = range_start; entry_index < range_end; entry_index++) { //fprintf(stderr, "index %ld\n", entry_index); int ret = 0; ffindex_entry_t* entry = ffindex_get_entry_by_index(index, entry_index); if(entry == NULL) { perror(entry->name); continue; } int pipefd[2]; ret = pipe(pipefd); if(ret != 0) { perror(entry->name); continue; } pid_t child_pid = fork(); if(child_pid == 0) { fclose(data_file); fclose(index_file); close(pipefd[1]); // Make pipe from parent our new stdin int newfd = dup2(pipefd[0], fileno(stdin)); if(newfd < 0) { fprintf(stdout, "%d %d\n", pipefd[0], newfd); perror(entry->name); } close(pipefd[0]); // exec program with the pipe as stdin execvp(program_name, program_argv); // never reached } else if(child_pid > 0) { // Read end is for child only close(pipefd[0]); // Write file data to child's stdin. char *filedata = ffindex_get_data_by_entry(data, entry); ssize_t written = 0; while(written < entry->length) { int w = write(pipefd[1], filedata + written, entry->length - written); if(w < 0 && errno != EPIPE) { perror(entry->name); break; } else if(w == 0 && errno != 0) { perror(entry->name); break; } else written += w; } close(pipefd[1]); // child gets EOF waitpid(child_pid, NULL, 0); } else { perror(entry->name); exit(errno); } } return 0; }