Datum dxsyn_init(PG_FUNCTION_ARGS) { List *dictoptions = (List *) PG_GETARG_POINTER(0); DictSyn *d; ListCell *l; d = (DictSyn *) palloc0(sizeof(DictSyn)); d->len = 0; d->syn = NULL; d->keeporig = true; foreach(l, dictoptions) { DefElem *defel = (DefElem *) lfirst(l); if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0) { d->keeporig = defGetBoolean(defel); } else if (pg_strcasecmp(defel->defname, "RULES") == 0) { read_dictionary(d, defGetString(defel)); } else { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized xsyn parameter: \"%s\"", defel->defname))); } }
cell * prepare_dictionary(int *argcp, char *(*argvp[])) { u_char *origin; u_char *here; u_char *xlimit; int dict_size; u_char *extension; char *dictionary_file = ""; // Allocate space for the Forth dictionary and read its initial contents origin = aln_alloc(MAXDICT, variables); xlimit = &origin[MAXDICT]; if(*argcp < 2 || (extension = strrchr((*argvp)[1],'.')) == NULL || strcmp(extension, ".dic") != 0 ) { dictionary_file = is_readable("app.dic") ? "app.dic" : DEFAULT_EXE; } else { dictionary_file = (*argvp)[1]; *argcp -= 1; *argvp += 1; } dict_size = read_dictionary(dictionary_file, origin, variables); here = &origin[dict_size]; init_compiler(origin, xlimit, 0xfffe, here, xlimit, variables); return variables; }
int main(){ //default initializations initializeStopwords(); std::unordered_set<std::string> suffixes = getDefaultSuffixes(); //everything else std::vector<std::string> rf = relevantFiles("examples",suffixes,false);//home/max/workspace/thtag/ //printStringVector(rf); gramDict mainDict; std::vector<gramDict> dictVec; //read all files in example subdirectory and clean each dictionary and merge them for (std::vector<std::string>::const_iterator current = rf.begin(); current != rf.end();++current) { std::cout<<(*current)<<'\n'; gramDict tempDict; long fileSize = getFileSize((*current)); std::cout<<"FILE SIZE: "<<fileSize<<'\n'; readCall((*current),&tempDict,-3,60); basicNLP(&tempDict); trimToTopN(&tempDict,(int) std::max(2*log(fileSize+500), 6.00)); merge_dictionaries(&mainDict,&tempDict); dictVec.push_back(tempDict); } read_dictionary(&mainDict); outputDictionary(&mainDict,"testDictOutput.txt"); //create map from files to keywords convertCountToIndex(&mainDict); writeKeywordDataFile(&dictVec,&mainDict,"keywordMap.txt"); outputFileList(&rf,"fileList.txt"); std::cout<<"DONE!\n"; }
foreign_t pl_read_dictionary(term_t filepath_term) { size_t length; char* filename; if(PL_is_string(filepath_term)) return PL_warning("please input a valid string"); PL_get_chars(filepath_term,&filename,CVT_ALL|BUF_DISCARDABLE); printf("reading the file for list of words %s\n",filename); if(read_dictionary(filename)==0) PL_succeed; else PL_fail; }
static Dictionary dictionary_six_str(const char * lang, const char * input, const char * dict_name, const char * pp_name, const char * cons_name, const char * affix_name, const char * regex_name) { const char * t; Dictionary dict; Dict_node *dict_node; dict = (Dictionary) xalloc(sizeof(struct Dictionary_s)); memset(dict, 0, sizeof(struct Dictionary_s)); /* Language and file-name stuff */ dict->string_set = string_set_create(); t = strrchr (lang, '/'); t = (NULL == t) ? lang : t+1; dict->lang = string_set_add(t, dict->string_set); lgdebug(D_USER_FILES, "Debug: Language: %s\n", dict->lang); dict->name = string_set_add(dict_name, dict->string_set); /* * A special setup per dictionary type. The check here assumes the affix * dictionary name contains "affix". FIXME: For not using this * assumption, the dictionary creating stuff needs a rearrangement. */ if (0 == strstr(dict->name, "affix")) { /* To disable spell-checking, just set the checker to NULL */ dict->spell_checker = spellcheck_create(dict->lang); #if defined HAVE_HUNSPELL || defined HAVE_ASPELL /* TODO: * 1. Set the spell option to 0, to signify no spell checking is done. * 2. On verbosity >= 1, add a detailed message on the reason. */ if (NULL == dict->spell_checker) prt_error("Info: Spell checker disabled."); #endif dict->insert_entry = insert_list; dict->lookup_list = lookup_list; dict->free_lookup = free_llist; dict->lookup = boolean_lookup; } else { /* * Affix dictionary. */ size_t i; dict->insert_entry = load_affix; dict->lookup = return_true; /* initialize the class table */ dict->afdict_class = malloc(sizeof(*dict->afdict_class) * ARRAY_SIZE(afdict_classname)); for (i = 0; i < ARRAY_SIZE(afdict_classname); i++) { dict->afdict_class[i].mem_elems = 0; dict->afdict_class[i].length = 0; dict->afdict_class[i].string = NULL; } } dict->affix_table = NULL; /* Read dictionary from the input string. */ dict->input = input; dict->pin = dict->input; if (!read_dictionary(dict)) { dict->pin = NULL; dict->input = NULL; goto failure; } dict->pin = NULL; dict->input = NULL; if (NULL == affix_name) { /* * The affix table is handled alone in this invocation. * Skip the rest of processing! * FIXME: The dictionary creating stuff needs a rearrangement. */ return dict; } /* If we don't have a locale per dictionary, the following * will also set the program's locale. */ dict->locale = linkgrammar_get_dict_locale(dict); set_utf8_program_locale(); #ifdef HAVE_LOCALE_T /* We have a locale per dictionary. */ if (NULL != dict->locale) dict->locale_t = newlocale_LC_CTYPE(dict->locale); /* If we didn't succeed to set the dictionary locale, the program will * SEGFAULT when it tries to use it with the isw*() functions. * So set it to the current program's locale as a last resort. */ if (NULL == dict->locale) { dict->locale = setlocale(LC_CTYPE, NULL); dict->locale_t = newlocale_LC_CTYPE(setlocale(LC_CTYPE, NULL)); prt_error("Warning: Couldn't set dictionary locale! " "Using current program locale %s", dict->locale); } /* If dict->locale is still not set, there is a bug. */ assert((locale_t)0 != dict->locale_t, "Dictionary locale is not set."); #else /* We don't have a locale per dictionary - but anyway make sure * dict->locale is consistent with the current program's locale, * and especially that it is not NULL. It still indicates the intended * locale of this dictionary and the locale of the compiled regexs. */ dict->locale = setlocale(LC_CTYPE, NULL); #endif /* HAVE_LOCALE_T */ dict->affix_table = dictionary_six(lang, affix_name, NULL, NULL, NULL, NULL); if (dict->affix_table == NULL) { prt_error("Error: Could not open affix file %s", affix_name); goto failure; } if (! afdict_init(dict)) goto failure; /* * Process the regex file. * We have to compile regexs using the dictionary locale, * so make a temporary locale swap. */ if (read_regex_file(dict, regex_name)) goto failure; const char *locale = setlocale(LC_CTYPE, NULL); locale = strdupa(locale); /* setlocale() uses static memory. */ setlocale(LC_CTYPE, dict->locale); lgdebug(+D_DICT, "Regexs locale %s\n", setlocale(LC_CTYPE, NULL)); if (compile_regexs(dict->regex_root, dict)) { locale = setlocale(LC_CTYPE, locale); goto failure; } locale = setlocale(LC_CTYPE, locale); assert(NULL != locale, "Cannot restore program locale\n"); #ifdef USE_CORPUS dict->corpus = lg_corpus_new(); #endif dict->left_wall_defined = boolean_dictionary_lookup(dict, LEFT_WALL_WORD); dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD); dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK); dict->base_knowledge = pp_knowledge_open(pp_name); dict->hpsg_knowledge = pp_knowledge_open(cons_name); dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD); dict->use_unknown_word = true; dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD); if (dict_node != NULL) dict->unlimited_connector_set = connector_set_create(dict_node->exp); free_lookup(dict_node); return dict; failure: string_set_delete(dict->string_set); if (dict->affix_table) xfree(dict->affix_table, sizeof(struct Dictionary_s)); xfree(dict, sizeof(struct Dictionary_s)); return NULL; }
/* The following function is dictionary_create with an extra paramater called "path". If this is non-null, then the path used to find the file is taken from that path. Otherwise the path is taken from the dict_name. This is only needed because an affix_file is opened by a recursive call to this function. */ static Dictionary internal_dictionary_create(char * dict_name, char * pp_name, char * cons_name, char * affix_name, char * path) { Dictionary dict; static int rand_table_inited=FALSE; Dict_node *dict_node; char * dictionary_path_name; dict = (Dictionary) xalloc(sizeof(struct Dictionary_s)); if (!rand_table_inited) { init_randtable(); rand_table_inited=TRUE; } dict->string_set = string_set_create(); dict->name = string_set_add(dict_name, dict->string_set); dict->num_entries = 0; dict->is_special = FALSE; dict->already_got_it = '\0'; dict->line_number = 1; dict->root = NULL; dict->word_file_header = NULL; dict->exp_list = NULL; dict->affix_table = NULL; /* *DS* remove this if (pp_name != NULL) { dict->post_process_filename = string_set_add(pp_name, dict->string_set); } else { dict->post_process_filename = NULL; } */ if (path != NULL) dictionary_path_name = path; else dictionary_path_name = dict_name; if (!open_dictionary(dictionary_path_name, dict)) { lperror(NODICT, dict_name); string_set_delete(dict->string_set); xfree(dict, sizeof(struct Dictionary_s)); return NULL; } if (!read_dictionary(dict)) { string_set_delete(dict->string_set); xfree(dict, sizeof(struct Dictionary_s)); return NULL; } dict->left_wall_defined = boolean_dictionary_lookup(dict, LEFT_WALL_WORD); dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD); dict->postprocessor = post_process_open(dict->name, pp_name); dict->constituent_pp = post_process_open(dict->name, cons_name); dict->affix_table = NULL; if (affix_name != NULL) { dict->affix_table = internal_dictionary_create(affix_name, NULL, NULL, NULL, dict_name); if (dict->affix_table == NULL) { fprintf(stderr, "%s\n", lperrmsg); exit(-1); } } dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD); dict->use_unknown_word = TRUE; dict->capitalized_word_defined = boolean_dictionary_lookup(dict, PROPER_WORD); dict->pl_capitalized_word_defined = boolean_dictionary_lookup(dict, PL_PROPER_WORD); dict->hyphenated_word_defined = boolean_dictionary_lookup(dict, HYPHENATED_WORD); dict->number_word_defined = boolean_dictionary_lookup(dict, NUMBER_WORD); dict->ing_word_defined = boolean_dictionary_lookup(dict, ING_WORD); dict->s_word_defined = boolean_dictionary_lookup(dict, S_WORD); dict->ed_word_defined = boolean_dictionary_lookup(dict, ED_WORD); dict->ly_word_defined = boolean_dictionary_lookup(dict, LY_WORD); dict->max_cost = 1000; if ((dict_node = dictionary_lookup(dict, ANDABLE_CONNECTORS_WORD)) != NULL) { dict->andable_connector_set = connector_set_create(dict_node->exp); } else { dict->andable_connector_set = NULL; } if ((dict_node = dictionary_lookup(dict, UNLIMITED_CONNECTORS_WORD)) != NULL) { dict->unlimited_connector_set = connector_set_create(dict_node->exp); } else { dict->unlimited_connector_set = NULL; } free_lookup_list(); return dict; }
int main(int argc, char *argv[]) { double time_spent; clock_t begin, end; begin = clock(); install_signal_handler(); struct gengetopt_args_info args_info; cod_t cod_struct; dict_t dict_struct; int parser_ret; parser_ret = cmdline_parser(argc, argv, &args_info); /** parse the user given parameters with gengetopt */ if (parser_ret != 0) { fprintf(stderr, "[ERROR] while calling cmdline_parser\n"); exit(1); } /**Missing Arguments or incorrect arguments ate least one argument must be provided*/ if (argc < 2) { printf("===================== HELP: Some Arguments Avaiable: =====================\n\n"); printf("--encode --> use this to encode an image file, don't forget to supply the file to encode\n"); printf("--parallel-encode --> use this to encode an image file with threads, don't forget to supply the file to encode and the number of threads\n"); printf("--decode --> use this to decode an image file, don't forget to supply the file to decode\n"); printf("--decode-dir --> use this to decode images in a given directory, suply the directory path\n"); printf("--PSNR --> use this to calculate .... between the original and decoded file, must supply both files as arguments\n"); printf("--dict --> use this to suply the dictionary file while encoding or decoding images\n"); printf("--about --> use this to know about the this app programmers\n"); printf("--help --> to know all the arguments avaiable\n\n"); printf("================================================================================\n"); exit(1); } /** About the authors */ if (args_info.about_given){ about(); } /** Decode the given file*/ if (args_info.decode_given){ cod_struct = read_cod_file(args_info.decode_arg); dict_struct = read_dictionary (args_info.dict_arg); decode_pgm(cod_struct, dict_struct, args_info.decode_arg); } /** Calculate PSNR */ if(args_info.PSNR_given){ char *token; char delim[2] = ","; char *original_filename; char *decoded_filename; token = strtok (args_info.PSNR_arg,delim); //point to 1 filename (before delimiter) original_filename = token; token = strtok (NULL, delim); //point to 2 filename (after delimiter) decoded_filename = token; DEBUG("%s", original_filename); DEBUG("%s", decoded_filename); calculatePSNR (original_filename, decoded_filename); } /** ------------------- Project Delivery II ------------------------- */ /** --encode argument given*/ if(args_info.encode_given){ printf("\n[TO BE DONE] option not implemented yet!!!\n"); } /** --parallel-encode argument given*/ if(args_info.parallel_encode_given){ /** --threads argument not given*/ if(!args_info.threads_given) { fprintf(stderr, "[ERROR] --threads <nthreads> parameter is mandatory with --parallel-enconde\n"); exit(1); }else { if(args_info.threads_arg > 1) { printf("\n[TO BE DONE] option not implemented yet!!!\n"); }else { fprintf(stderr, "[ERROR] The threads number must be more than 1\n"); exit(1); } } } if (args_info.decode_dir_given) { printf("\nOption not full implemented yet!!!\n"); } if (args_info.dict_given == 1) { validate_extension(args_info.dict_arg, ".dic"); } /** free the memory allocated by gengetop */ cmdline_parser_free (&args_info); end = clock(); time_spent = (double)(end - begin) / CLOCKS_PER_SEC; printf ("Excution Time: %.3f s\n", time_spent); return 0; }
int main(int argc, char *argv[]) { FILE *FptrNumDocs; ENVIRONMENT env; char *model_data_dir; char pathbuf[BUFSIZ]; char *col_label_file; int write_matlab; int col_labels_from_file; int rows, columns; MODEL_PARAMS model_params; MODEL_INFO model_info; env.word_array = NULL; env.word_tree = NULL; if ( argc != 17 ) usage_and_exit( argv[0], 1 ); if ( strcmp( argv[1], "-mdir" ) != 0 ) usage_and_exit( argv[0], 1 ); model_data_dir = argv[2]; if ( strcmp( argv[3], "-matlab" ) != 0 ) usage_and_exit( argv[0], 1 ); write_matlab = atoi( argv[4] ); if ( strcmp( argv[5], "-precontext" ) != 0 ) usage_and_exit( argv[0], 1 ); pre_context_size = atoi( argv[6] ); if ( strcmp( argv[7], "-postcontext" ) != 0 ) usage_and_exit( argv[0], 1 ); post_context_size = atoi( argv[8] ); if ( strcmp( argv[9], "-rows" ) != 0 ) usage_and_exit( argv[0], 1 ); rows = atoi( argv[10] ); if ( strcmp( argv[11], "-columns" ) != 0 ) usage_and_exit( argv[0], 1 ); columns = atoi( argv[12] ); if ( strcmp( argv[13], "-col_labels_from_file" ) != 0 ) usage_and_exit( argv[0], 1 ); col_labels_from_file = atoi( argv[14] ); if ( strcmp( argv[15], "-col_label_file" ) != 0 ) usage_and_exit( argv[0], 1 ); col_label_file = argv[16]; fprintf( stderr, "model data dir is \"%s\".\n", model_data_dir ); /** Read in current model params **/ sprintf( pathbuf, "%s/%s", model_data_dir, MODEL_PARAMS_BIN_FILE ); if ( !read_model_params( pathbuf, &model_params )) { die( "count_wordvec.c: couldn't read model data file\n" ); } sprintf( pathbuf, "%s/%s", model_data_dir, MODEL_INFO_BIN_FILE ); if ( !read_model_info( pathbuf, &model_info )) { die( "count_wordvec.c: couldn't read model info file\n" ); } if (model_params.rows < rows) { rows = model_params.rows; } else { model_params.rows = rows; } printf("count_wordvec.c: looking for %d rows\n", rows); printf("which had better match %d\n", model_params.rows); model_info.columns = columns; model_info.col_labels_from_file = col_labels_from_file; model_info.pre_context_size = pre_context_size; model_info.post_context_size = post_context_size; model_info.blocksize = BLOCKSIZE; model_info.start_columns = START_COLUMNS; message( "Reading the dictionary... "); if( !read_dictionary( &(env.word_array), &(env.word_tree), model_data_dir )) die( "count_wordvec.c: Can't read the dictionary.\n"); /*** read number of ducuments from file ***/ sprintf( pathbuf, "%s/%s", model_data_dir, FNUM_FILE ); if ( !my_fopen( &FptrNumDocs, pathbuf, "r" )) die( "couldn't open filenames file" ); if( !fscanf( FptrNumDocs, "%d", &numDocs )) die( "can't read numDocs" ); if( !my_fclose( &FptrNumDocs )) die( "couldn't close numDocs file" ); /*****/ /* Set some initial values in the matrix, arrays etc. */ if( !initialize_row_indices( env.word_array, &(env.row_indices), rows )) die( "Couldn't initialize row indices.\n"); if( !initialize_column_indices( env.word_array, &(env.col_indices), columns, col_labels_from_file, col_label_file, &(env.word_tree) )) die( "Couldn't initialize column indices.\n"); /* Allocate memory and set everything to zero. Defined in matrix.h */ if( !initialize_matrix( (MATRIX_TYPE***) &(env.matrix), rows, columns)) die( "Can't initialize matrix.\n"); /* Go through the wordlist, applying process_region to all regions. */ fprintf( stderr, "model data dir is \"%s\".\n", model_data_dir ); fprintf( stderr, "count_wordvec.c: about to call process_wordlist\n" ); if( !process_wordlist( is_target, advance_target, set_region_in, set_region_out, process_region , &env, model_data_dir)) die( "Couldn't process wordlist.\n"); /* Perform some conversion on the matrix. E.g. some kind of normalization. We traditionally take the square root of all entries. */ if( !transform_matrix( (MATRIX_TYPE **) (env.matrix), rows, columns)) die( "Couldn't transform matrix.\n"); /* Write the co-occurrence matrix. */ message( "Writing the co-occurrence matrix.\n"); if( !write_matrix_svd((MATRIX_TYPE **) (env.matrix), rows, columns, model_data_dir )) die( "count_wordvec.c: couldn't write co-occurrence " "matrix in SVD input format.\n" ); if ( write_matlab ) { if ( !write_matrix_matlab( (MATRIX_TYPE **)(env.matrix), rows, columns, model_data_dir )) die( "count_wordvec.c: couldn't write co-occurrence " "matrix in Matlab input format.\n" ); } sprintf( pathbuf, "%s/%s", model_data_dir, MODEL_PARAMS_BIN_FILE ); if ( !write_model_params( pathbuf, &model_params )) { die( "count_wordvec.c: couldn't write model params file\n" ); } sprintf( pathbuf, "%s/%s", model_data_dir, MODEL_INFO_BIN_FILE ); if ( !write_model_info( pathbuf, &model_info )) { die( "count_wordvec.c: couldn't write model info file\n" ); } exit( EXIT_SUCCESS); }
/** * Read dictionary entries from a wide-character string "input". * All other parts are read from files. */ static Dictionary dictionary_six_str(const char * lang, const char * input, const char * dict_name, const char * pp_name, const char * cons_name, const char * affix_name, const char * regex_name) { const char * t; Dictionary dict; Dict_node *dict_node; dict = (Dictionary) xalloc(sizeof(struct Dictionary_s)); memset(dict, 0, sizeof(struct Dictionary_s)); dict->num_entries = 0; dict->is_special = false; dict->already_got_it = '\0'; dict->line_number = 0; dict->root = NULL; dict->regex_root = NULL; dict->word_file_header = NULL; dict->exp_list = NULL; dict->affix_table = NULL; dict->recursive_error = false; dict->version = NULL; #ifdef HAVE_SQLITE dict->db_handle = NULL; #endif #ifdef USE_ANYSPLIT dict->anysplit = NULL; #endif /* Language and file-name stuff */ dict->string_set = string_set_create(); dict->lang = lang; t = strrchr (lang, '/'); if (t) dict->lang = string_set_add(t+1, dict->string_set); dict->name = string_set_add(dict_name, dict->string_set); /* * A special setup per dictionary type. The check here assumes the affix * dictionary name contains "affix". FIXME: For not using this * assumption, the dictionary creating stuff needs a rearrangement. */ if (0 == strstr(dict->name, "affix")) { /* To disable spell-checking, just set the checker to NULL */ dict->spell_checker = spellcheck_create(dict->lang); dict->insert_entry = insert_list; dict->lookup_list = lookup_list; dict->free_lookup = free_llist; dict->lookup = boolean_lookup; } else { /* * Affix dictionary. */ size_t i; dict->insert_entry = load_affix; dict->lookup = return_true; /* initialize the class table */ dict->afdict_class = malloc(sizeof(*dict->afdict_class) * NUMELEMS(afdict_classname)); for (i = 0; i < NUMELEMS(afdict_classname); i++) { dict->afdict_class[i].mem_elems = 0; dict->afdict_class[i].length = 0; dict->afdict_class[i].string = NULL; } } dict->affix_table = NULL; /* Read dictionary from the input string. */ dict->input = input; dict->pin = dict->input; if (!read_dictionary(dict)) { dict->pin = NULL; dict->input = NULL; goto failure; } dict->pin = NULL; dict->input = NULL; if (NULL == affix_name) { /* * The affix table is handled alone in this invocation. * Skip the rest of processing! * FIXME: The dictionary creating stuff needs a rearrangement. */ return dict; } dict->affix_table = dictionary_six(lang, affix_name, NULL, NULL, NULL, NULL); if (dict->affix_table == NULL) { prt_error("Error: Could not open affix file %s", affix_name); goto failure; } if (! afdict_init(dict)) goto failure; if (read_regex_file(dict, regex_name)) goto failure; if (compile_regexs(dict->regex_root, dict)) goto failure; #ifdef USE_CORPUS dict->corpus = lg_corpus_new(); #endif dict->left_wall_defined = boolean_dictionary_lookup(dict, LEFT_WALL_WORD); dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD); dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK); dict->base_knowledge = pp_knowledge_open(pp_name); dict->hpsg_knowledge = pp_knowledge_open(cons_name); dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD); dict->use_unknown_word = true; dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD); if (dict_node != NULL) { dict->unlimited_connector_set = connector_set_create(dict_node->exp); } else { dict->unlimited_connector_set = NULL; } free_lookup(dict_node); return dict; failure: string_set_delete(dict->string_set); if (dict->affix_table) xfree(dict->affix_table, sizeof(struct Dictionary_s)); xfree(dict, sizeof(struct Dictionary_s)); return NULL; }