static int combine_files (enum comb_command_type command, struct lexer *lexer, struct dataset *ds) { struct comb_proc proc; bool saw_by = false; bool saw_sort = false; struct casereader *active_file = NULL; char *first_name = NULL; char *last_name = NULL; struct taint *taint = NULL; size_t n_tables = 0; size_t allocated_files = 0; size_t i; proc.files = NULL; proc.n_files = 0; proc.dict = dict_create (get_default_encoding ()); proc.output = NULL; proc.matcher = NULL; subcase_init_empty (&proc.by_vars); proc.first = NULL; proc.last = NULL; proc.buffered_case = NULL; proc.prev_BY = NULL; dict_set_case_limit (proc.dict, dict_get_case_limit (dataset_dict (ds))); lex_match (lexer, T_SLASH); for (;;) { struct comb_file *file; enum comb_file_type type; if (lex_match_id (lexer, "FILE")) type = COMB_FILE; else if (command == COMB_MATCH && lex_match_id (lexer, "TABLE")) { type = COMB_TABLE; n_tables++; } else break; lex_match (lexer, T_EQUALS); if (proc.n_files >= allocated_files) proc.files = x2nrealloc (proc.files, &allocated_files, sizeof *proc.files); file = &proc.files[proc.n_files++]; file->type = type; subcase_init_empty (&file->by_vars); subcase_init_empty (&file->src); subcase_init_empty (&file->dst); file->mv = NULL; file->handle = NULL; file->dict = NULL; file->reader = NULL; file->data = NULL; file->is_sorted = true; file->in_name = NULL; file->in_var = NULL; if (lex_match (lexer, T_ASTERISK)) { if (!dataset_has_source (ds)) { msg (SE, _("Cannot specify the active dataset since none " "has been defined.")); goto error; } if (proc_make_temporary_transformations_permanent (ds)) msg (SE, _("This command may not be used after TEMPORARY when " "the active dataset is an input source. " "Temporary transformations will be made permanent.")); file->dict = dict_clone (dataset_dict (ds)); } else { file->handle = fh_parse (lexer, FH_REF_FILE, dataset_session (ds)); if (file->handle == NULL) goto error; file->reader = any_reader_open (file->handle, NULL, &file->dict); if (file->reader == NULL) goto error; } while (lex_match (lexer, T_SLASH)) if (lex_match_id (lexer, "RENAME")) { if (!parse_dict_rename (lexer, file->dict)) goto error; } else if (lex_match_id (lexer, "IN")) { lex_match (lexer, T_EQUALS); if (lex_token (lexer) != T_ID) { lex_error (lexer, NULL); goto error; } if (file->in_name) { msg (SE, _("Multiple IN subcommands for a single FILE or " "TABLE.")); goto error; } file->in_name = xstrdup (lex_tokcstr (lexer)); lex_get (lexer); } else if (lex_match_id (lexer, "SORT")) { file->is_sorted = false; saw_sort = true; } if (!merge_dictionary (proc.dict, file)) goto error; } while (lex_token (lexer) != T_ENDCMD) { if (lex_match (lexer, T_BY)) { const struct variable **by_vars; size_t i; bool ok; if (saw_by) { lex_sbc_only_once ("BY"); goto error; } saw_by = true; lex_match (lexer, T_EQUALS); if (!parse_sort_criteria (lexer, proc.dict, &proc.by_vars, &by_vars, NULL)) goto error; ok = true; for (i = 0; i < proc.n_files; i++) { struct comb_file *file = &proc.files[i]; size_t j; for (j = 0; j < subcase_get_n_fields (&proc.by_vars); j++) { const char *name = var_get_name (by_vars[j]); struct variable *var = dict_lookup_var (file->dict, name); if (var != NULL) subcase_add_var (&file->by_vars, var, subcase_get_direction (&proc.by_vars, j)); else { if (file->handle != NULL) msg (SE, _("File %s lacks BY variable %s."), fh_get_name (file->handle), name); else msg (SE, _("Active dataset lacks BY variable %s."), name); ok = false; } } assert (!ok || subcase_conformable (&file->by_vars, &proc.files[0].by_vars)); } free (by_vars); if (!ok) goto error; } else if (command != COMB_UPDATE && lex_match_id (lexer, "FIRST")) { if (first_name != NULL) { lex_sbc_only_once ("FIRST"); goto error; } lex_match (lexer, T_EQUALS); if (!lex_force_id (lexer)) goto error; first_name = xstrdup (lex_tokcstr (lexer)); lex_get (lexer); } else if (command != COMB_UPDATE && lex_match_id (lexer, "LAST")) { if (last_name != NULL) { lex_sbc_only_once ("LAST"); goto error; } lex_match (lexer, T_EQUALS); if (!lex_force_id (lexer)) goto error; last_name = xstrdup (lex_tokcstr (lexer)); lex_get (lexer); } else if (lex_match_id (lexer, "MAP")) { /* FIXME. */ } else if (lex_match_id (lexer, "DROP")) { if (!parse_dict_drop (lexer, proc.dict)) goto error; } else if (lex_match_id (lexer, "KEEP")) { if (!parse_dict_keep (lexer, proc.dict)) goto error; } else { lex_error (lexer, NULL); goto error; } if (!lex_match (lexer, T_SLASH) && lex_token (lexer) != T_ENDCMD) { lex_end_of_command (lexer); goto error; } } if (!saw_by) { if (command == COMB_UPDATE) { lex_sbc_missing ("BY"); goto error; } if (n_tables) { msg (SE, _("BY is required when %s is specified."), "TABLE"); goto error; } if (saw_sort) { msg (SE, _("BY is required when %s is specified."), "SORT"); goto error; } } /* Add IN, FIRST, and LAST variables to master dictionary. */ for (i = 0; i < proc.n_files; i++) { struct comb_file *file = &proc.files[i]; if (!create_flag_var ("IN", file->in_name, proc.dict, &file->in_var)) goto error; } if (!create_flag_var ("FIRST", first_name, proc.dict, &proc.first) || !create_flag_var ("LAST", last_name, proc.dict, &proc.last)) goto error; dict_delete_scratch_vars (proc.dict); dict_compact_values (proc.dict); /* Set up mapping from each file's variables to master variables. */ for (i = 0; i < proc.n_files; i++) { struct comb_file *file = &proc.files[i]; size_t src_var_cnt = dict_get_var_cnt (file->dict); size_t j; file->mv = xnmalloc (src_var_cnt, sizeof *file->mv); for (j = 0; j < src_var_cnt; j++) { struct variable *src_var = dict_get_var (file->dict, j); struct variable *dst_var = dict_lookup_var (proc.dict, var_get_name (src_var)); if (dst_var != NULL) { size_t n = subcase_get_n_fields (&file->src); file->mv[n] = var_get_missing_values (src_var); subcase_add_var (&file->src, src_var, SC_ASCEND); subcase_add_var (&file->dst, dst_var, SC_ASCEND); } } } proc.output = autopaging_writer_create (dict_get_proto (proc.dict)); taint = taint_clone (casewriter_get_taint (proc.output)); /* Set up case matcher. */ proc.matcher = case_matcher_create (); for (i = 0; i < proc.n_files; i++) { struct comb_file *file = &proc.files[i]; if (file->reader == NULL) { if (active_file == NULL) { proc_discard_output (ds); file->reader = active_file = proc_open_filtering (ds, false); } else file->reader = casereader_clone (active_file); } if (!file->is_sorted) file->reader = sort_execute (file->reader, &file->by_vars); taint_propagate (casereader_get_taint (file->reader), taint); file->data = casereader_read (file->reader); if (file->type == COMB_FILE) case_matcher_add_input (proc.matcher, &file->by_vars, &file->data, &file->is_minimal); } if (command == COMB_ADD) execute_add_files (&proc); else if (command == COMB_MATCH) execute_match_files (&proc); else if (command == COMB_UPDATE) execute_update (&proc); else NOT_REACHED (); case_matcher_destroy (proc.matcher); proc.matcher = NULL; close_all_comb_files (&proc); if (active_file != NULL) proc_commit (ds); dataset_set_dict (ds, proc.dict); dataset_set_source (ds, casewriter_make_reader (proc.output)); proc.dict = NULL; proc.output = NULL; free_comb_proc (&proc); free (first_name); free (last_name); return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE; error: if (active_file != NULL) proc_commit (ds); free_comb_proc (&proc); taint_destroy (taint); free (first_name); free (last_name); return CMD_CASCADING_FAILURE; }
/* MAIN */ int main( int argc, char **argv ) { size_t results[] = {4255, 142803, 46838, 0}, i = 0, output = 0; const size_t b = 5; const char *count_bytes_files[] = { "/data/orig_breaks.txt", "/data/spawner_output.txt", "/data/zh_romance_of_three_kingdoms.txt", ""}, *source_dir = getenv("SOURCE"); char buffer[BUFSIZ], tinybuffer[b], *locale = "root", *encoding = "UTF-8"; OChar obuffer[BUFSIZ], otinybuffer[b]; struct stat sts; time_t now; FILE *f; init_oly(argv[0], TEST_PKGDATADIR, encoding, locale); if (source_dir == NULL) { fprintf(stderr, "requires SOURCE environment variable, supplied by runtest. Exiting...\n"); exit(EXIT_FAILURE); } plan(16); diag("----- Testing oly_timestamp function. -----"); time(&now); is_double(((double)( now*1000)), oly_timestamp(), 1000.0, "Two times should be close enough."); diag("----- Testing count_file_bytes function. -----"); for (i = 0; (results[i] != 0); i++) { strcpy(buffer, source_dir); strcat(buffer, count_bytes_files[i]); if (stat(buffer, &sts) == -1 && errno == ENOENT) { printf ("The file %s doesn't exist...\n", buffer); } f = fopen(buffer, "r"); assert( count_file_bytes( f, &output ) == OLY_OKAY ) ; is_int(results[i], output, "File: %s", count_bytes_files[i]); fclose(f); } diag("----- Testing get_default_locale and get_default_encoding function. -----"); is_unicode_string( u"root", get_default_locale(), "For this test, default locale should be root."); is_unicode_string( u"UTF-8", get_default_encoding(), "For this test, default encoding should be UTF-8."); diag("----- Testing char_default_locale and char_default_encoding function. -----"); is_string( "root", char_default_locale(), "For this test, default locale should be root."); is_string( "UTF-8", char_default_encoding(), "For this test, default encoding should be UTF-8."); diag("----- ostr_to_cstr and cstr_to_ostr -----"); is_unicode_string( u"Lorum ipsum etc...", cstr_to_ostr( obuffer, BUFSIZ, "Lorum ipsum etc..."), "Char to ochar with space, BUFSIZ = %i", BUFSIZ); is_unicode_string( u"Loru", cstr_to_ostr( otinybuffer, b, "Lorum ipsum etc..." ), "Char to ochar with tiny buffer of %i characters.", (int)b ); is_unicode_string( u"", cstr_to_ostr( otinybuffer, b, "" ), "Char to ochar With empty string" ); is_unicode_string( u"", cstr_to_ostr( otinybuffer, b, NULL ), "Char to ochar from null" ); is_string( "Back the other way!", ostr_to_cstr( buffer, BUFSIZ, u"Back the other way!" ), "OChar to char, BUFSIZ = %i", BUFSIZ ); is_string( "five", ostr_to_cstr( tinybuffer, b, u"five characters is all that fits." ), "OChar to char, buffer of %i OChars", (int)b ); is_string( "", ostr_to_cstr( tinybuffer, b, NULL ), "Char to ochar a NULL from string." ); is_string( "", ostr_to_cstr( tinybuffer, b, u"" ), "Char to ochar with an empty string." ); exit(EXIT_SUCCESS); }