// Print the current configuration. // Called by the setup menu 'show' command. int8_t Copter::setup_show(uint8_t argc, const Menu::arg *argv) { AP_Param *param; ap_var_type type; //If a parameter name is given as an argument to show, print only that parameter if(argc>=2) { param=AP_Param::find(argv[1].str, &type); if(!param) { cliSerial->printf("Parameter not found: '%s'\n", argv[1].str); return 0; } AP_Param::show(param, argv[1].str, type, cliSerial); return 0; } // clear the area print_blanks(8); report_version(); report_radio(); report_frame(); report_batt_monitor(); report_flight_modes(); report_ins(); report_compass(); report_optflow(); AP_Param::show_all(cliSerial); return(0); }
int main(int argc, char **argv) { FILE *f; char buf[8], *b, *s; int comkeep, oneline, prefer; nidr_save_exedir(progname = *argv, 0); /* 0 ==> no $PATH adjustment */ #ifdef NO_NIDR_keywds0 comkeep = oneline = prefer = 1; #else comkeep = oneline = prefer = 0; #endif nextarg: while((s = *++argv) && *s == '-') { for(;;) switch(*++s) { case 0: goto nextarg; #ifndef NO_NIDR_keywds0 case '1': ++oneline; continue; case 'c': ++comkeep; continue; case 'j': if (*++s || (s = *++argv)) { if (specadj(s)) return 1; goto nextarg; } goto usage1; case 'p': ++prefer; continue; #endif case 'h': return usage(!s[1] || !strcmp(s,"help") ? 0 : 1); #ifdef NIDR_DYLIB_DEBUG /* -s is a debugging option, not mentioned in usage() */ case 's': nidr_set_strict(1); goto nextarg; #endif case 'v': return report_version(); case '?': return usage(s[1] != 0); case '-': if (!s[1]) { s = *++argv; goto optsdone; } if (!strcmp(s,"-help")) return usage(0); if (!strcmp(s,"-version")) return report_version(); usage1: default: return usage(1); } } optsdone: /* default nidrin (set by nidr-scanner) is stdin */ f = stdout; NIDR_disallow_missing_start = 0; #ifdef NO_NIDR_keywds0 if (!s || specadj(s)) return usage(1); s = *++argv; #endif if (s) { if (argv[1] && argv[2]) return usage(1); if (!(nidrin = fopen(s,"r"))) { fprintf(stderr, "%s: Cannot open input file \"%s\"\n", progname, s); return 1; } if ((b = argv[1]) && !(f = fopen(b, "w"))) { fprintf(stderr, "%s: Cannot open output file \"%s\"\n", progname, b); return 1; } } b = buf; if (oneline + comkeep + prefer) { *b++ = '-'; if (oneline) *b++ = '1'; if (comkeep) *b++ = 'c'; if (prefer) *b++ = 'p'; *b = 0; } nidr_parse(buf, f); return 0; }
void Copter::init_ardupilot() { if (!hal.gpio->usb_connected()) { // USB is not connected, this means UART0 may be a Xbee, with // its darned bricking problem. We can't write to it for at // least one second after powering up. Simplest solution for // now is to delay for 1 second. Something more elegant may be // added later delay(1000); } // initialise serial port serial_manager.init_console(); // init vehicle capabilties init_capabilities(); cliSerial->printf("\n\nInit " FIRMWARE_STRING "\n\nFree RAM: %u\n", (unsigned)hal.util->available_memory()); // // Report firmware version code expect on console (check of actual EEPROM format version is done in load_parameters function) // report_version(); // load parameters from EEPROM load_parameters(); BoardConfig.init(); // initialise serial port serial_manager.init(); // init EPM cargo gripper #if EPM_ENABLED == ENABLED epm.init(); #endif // initialise notify system // disable external leds if epm is enabled because of pin conflict on the APM notify.init(true); // initialise battery monitor battery.init(); // Init RSSI rssi.init(); barometer.init(); // Register the mavlink service callback. This will run // anytime there are more than 5ms remaining in a call to // hal.scheduler->delay. hal.scheduler->register_delay_callback(mavlink_delay_cb_static, 5); // we start by assuming USB connected, as we initialed the serial // port with SERIAL0_BAUD. check_usb_mux() fixes this if need be. ap.usb_connected = true; check_usb_mux(); // init the GCS connected to the console gcs[0].setup_uart(serial_manager, AP_SerialManager::SerialProtocol_Console, 0); // init telemetry port gcs[1].setup_uart(serial_manager, AP_SerialManager::SerialProtocol_MAVLink, 0); // setup serial port for telem2 gcs[2].setup_uart(serial_manager, AP_SerialManager::SerialProtocol_MAVLink, 1); // setup serial port for fourth telemetry port (not used by default) gcs[3].setup_uart(serial_manager, AP_SerialManager::SerialProtocol_MAVLink, 2); #if FRSKY_TELEM_ENABLED == ENABLED // setup frsky frsky_telemetry.init(serial_manager); #endif // identify ourselves correctly with the ground station mavlink_system.sysid = g.sysid_this_mav; #if LOGGING_ENABLED == ENABLED log_init(); #endif GCS_MAVLINK::set_dataflash(&DataFlash); // update motor interlock state update_using_interlock(); #if FRAME_CONFIG == HELI_FRAME // trad heli specific initialisation heli_init(); #endif init_rc_in(); // sets up rc channels from radio init_rc_out(); // sets up motors and output to escs // initialise which outputs Servo and Relay events can use ServoRelayEvents.set_channel_mask(~motors.get_motor_mask()); relay.init(); /* * setup the 'main loop is dead' check. Note that this relies on * the RC library being initialised. */ hal.scheduler->register_timer_failsafe(failsafe_check_static, 1000); // Do GPS init gps.init(&DataFlash, serial_manager); if(g.compass_enabled) init_compass(); #if OPTFLOW == ENABLED // make optflow available to AHRS ahrs.set_optflow(&optflow); #endif // init Location class Location_Class::set_ahrs(&ahrs); #if AP_TERRAIN_AVAILABLE && AC_TERRAIN Location_Class::set_terrain(&terrain); wp_nav.set_terrain(&terrain); #endif pos_control.set_dt(MAIN_LOOP_SECONDS); // init the optical flow sensor init_optflow(); #if MOUNT == ENABLED // initialise camera mount camera_mount.init(&DataFlash, serial_manager); #endif #if PRECISION_LANDING == ENABLED // initialise precision landing init_precland(); #endif #ifdef USERHOOK_INIT USERHOOK_INIT #endif #if CLI_ENABLED == ENABLED if (g.cli_enabled) { const char *msg = "\nPress ENTER 3 times to start interactive setup\n"; cliSerial->println(msg); if (gcs[1].initialised && (gcs[1].get_uart() != NULL)) { gcs[1].get_uart()->println(msg); } if (num_gcs > 2 && gcs[2].initialised && (gcs[2].get_uart() != NULL)) { gcs[2].get_uart()->println(msg); } } #endif // CLI_ENABLED #if HIL_MODE != HIL_MODE_DISABLED while (barometer.get_last_update() == 0) { // the barometer begins updating when we get the first // HIL_STATE message gcs_send_text(MAV_SEVERITY_WARNING, "Waiting for first HIL_STATE message"); delay(1000); } // set INS to HIL mode ins.set_hil_mode(); #endif // read Baro pressure at ground //----------------------------- init_barometer(true); // initialise sonar #if CONFIG_SONAR == ENABLED init_sonar(); #endif // initialise AP_RPM library rpm_sensor.init(); // initialise mission library mission.init(); // initialise the flight mode and aux switch // --------------------------- reset_control_switch(); init_aux_switches(); startup_INS_ground(); // set landed flags set_land_complete(true); set_land_complete_maybe(true); // we don't want writes to the serial port to cause us to pause // mid-flight, so set the serial ports non-blocking once we are // ready to fly serial_manager.set_blocking_writes_all(false); // enable CPU failsafe failsafe_enable(); ins.set_raw_logging(should_log(MASK_LOG_IMU_RAW)); ins.set_dataflash(&DataFlash); cliSerial->print("\nReady to FLY "); // flag that initialisation has completed ap.initialised = true; }
void main(int argc, char *argv[]) { int verbosity; int vocab_size; int cutoff; int num_recs; int current_rec; int num_above_threshold; int num_to_output; int i; word_rec *records; char temp_word[750]; flag gt_set; flag top_set; /* Process command line */ report_version(&argc,argv); if (pc_flagarg( &argc, argv,"-help")) { fprintf(stderr,"wfreq2vocab : Generate a vocabulary file from a word frequency file.\n"); fprintf(stderr,"Usage : wfreq2vocab [ -top 20000 | -gt 10]\n"); fprintf(stderr," [ -records %d ]\n",DEFAULT_MAX_RECORDS); fprintf(stderr," [ -verbosity %d]\n",DEFAULT_VERBOSITY); fprintf(stderr," < .wfreq > .vocab\n"); exit(1); } cutoff = pc_intarg( &argc, argv, "-gt",-1); vocab_size = pc_intarg(&argc, argv, "-top",-1); num_recs = pc_intarg(&argc, argv, "-records",DEFAULT_MAX_RECORDS); verbosity = pc_intarg(&argc, argv, "-verbosity",DEFAULT_VERBOSITY); pc_report_unk_args(&argc,argv,verbosity); if (cutoff != -1) { gt_set = 1; } else { gt_set = 0; cutoff = 0; } if (vocab_size != -1) { top_set = 1; } else { top_set = 0; vocab_size = 0; } if (gt_set && top_set) { quit(-1,"wfreq2vocab : Error : Can't use both the -top and the -gt options.\n"); } if (!gt_set && !top_set) { vocab_size = 20000; } if (gt_set) { pc_message(verbosity,2,"wfreq2vocab : Will generate a vocabulary containing all words which\n occurred more that %d times. Reading wfreq stream from stdin...\n",cutoff); } else { pc_message(verbosity,2,"wfreq2vocab : Will generate a vocabulary containing the most\n frequent %d words. Reading wfreq stream from stdin...\n",vocab_size); } records = (word_rec *) rr_malloc(sizeof(word_rec)*num_recs); current_rec = 0; num_above_threshold = 0; while (!rr_feof(stdin)) { if (scanf("%s %d",temp_word,&(records[current_rec].count)) != 2) { if (!rr_feof(stdin)) { quit(-1,"Error reading unigram counts from standard input.\n"); } } else { records[current_rec].word = salloc(temp_word); if (gt_set && records[current_rec].count > cutoff) { num_above_threshold++; } current_rec++; } } /* Sort records in descending order of count */ qsort((void*) records,(size_t) current_rec, sizeof(word_rec),sort_by_count); if (gt_set) { num_to_output = num_above_threshold; } else { num_to_output = vocab_size; } if (current_rec<num_to_output) { num_to_output = current_rec; } /* Now sort the relevant records alphabetically */ qsort((void*) records,(size_t) num_to_output, sizeof(word_rec),sort_alpha); if (gt_set) { pc_message(verbosity,2,"Size of vocabulary = %d\n",num_to_output); } if (num_to_output>65535) { pc_message(verbosity,1,"Warning : Vocab size exceeds 65535. This will cause problems with \nother tools, since word id's are stored in 2 bytes.\n"); } /* Print the vocab to stdout */ printf("## Vocab generated by v2 of the CMU-Cambridge Statistcal\n"); printf("## Language Modeling toolkit.\n"); printf("##\n"); printf("## Includes %d words ",num_to_output); printf("##\n"); for (i=0;i<=num_to_output-1;i++) { printf("%s\n",records[i].word); } pc_message(verbosity,0,"wfreq2vocab : Done.\n"); exit(0); }
int oe_02_main (int argc, char **argv) { ng_t ng; arpa_lm_t arpa_ng; char input_string[500]; int num_of_args; char *args[MAX_ARGS]; char *lm_filename_arpa; char *lm_filename_binary; flag told_to_quit; flag inconsistant_parameters; flag backoff_from_unk_inc; flag backoff_from_unk_exc; flag backoff_from_ccs_inc; flag backoff_from_ccs_exc; flag arpa_lm; flag binary_lm; flag include_unks; char *fb_list_filename; char *probs_stream_filename; char *annotation_filename; char *text_stream_filename; char *oov_filename; char *ccs_filename; int generate_size; int random_seed; double log_base; char wlist_entry[1024]; char current_cc[200]; vocab_sz_t current_cc_id; FILE *context_cues_fp; int n; /* Process command line */ report_version(&argc,argv); if (pc_flagarg(&argc, argv,"-help") || argc == 1 || (strcmp(argv[1],"-binary") && strcmp(argv[1],"-arpa"))) { oe_02_help_message(); exit(1); } lm_filename_arpa = rr_salloc(pc_stringarg(&argc, argv,"-arpa","")); if (strcmp(lm_filename_arpa,"")) arpa_lm = 1; else arpa_lm = 0; lm_filename_binary = rr_salloc(pc_stringarg(&argc, argv,"-binary","")); if (strcmp(lm_filename_binary,"")) binary_lm = 1; else binary_lm = 0; if (arpa_lm && binary_lm) quit(-1,"Error : Can't use both -arpa and -binary flags.\n"); if (!arpa_lm && !binary_lm) quit(-1,"Error : Must specify either a binary or an arpa format language model.\n"); ccs_filename = rr_salloc(pc_stringarg(&argc, argv,"-context","")); if (binary_lm && strcmp(ccs_filename,"")) fprintf(stderr,"Warning - context cues file not needed with binary language model file.\nWill ignore it.\n"); pc_report_unk_args(&argc,argv,2); /* Load language model */ if (arpa_lm) { fprintf(stderr,"Reading in language model from file %s\n", lm_filename_arpa); load_arpa_lm(&arpa_ng,lm_filename_arpa); }else { fprintf(stderr,"Reading in language model from file %s\n", lm_filename_binary); load_lm(&ng,lm_filename_binary); } fprintf(stderr,"\nDone.\n"); n=arpa_lm? arpa_ng.n: ng.n; if (arpa_lm) { arpa_ng.context_cue = (flag *) rr_calloc(arpa_ng.table_sizes[0],sizeof(flag)); arpa_ng.no_of_ccs = 0; if (strcmp(ccs_filename,"")) { context_cues_fp = rr_iopen(ccs_filename); while (fgets (wlist_entry, sizeof (wlist_entry),context_cues_fp)) { if (strncmp(wlist_entry,"##",2)==0) continue; sscanf (wlist_entry, "%s ",current_cc); warn_on_wrong_vocab_comments(wlist_entry); if (sih_lookup(arpa_ng.vocab_ht,current_cc,¤t_cc_id) == 0) quit(-1,"Error : %s in the context cues file does not appear in the vocabulary.\n",current_cc); arpa_ng.context_cue[(unsigned short) current_cc_id] = 1; arpa_ng.no_of_ccs++; fprintf(stderr,"Context cue word : %s id = %lld\n",current_cc,current_cc_id); } rr_iclose(context_cues_fp); } } /* Process commands */ told_to_quit = 0; num_of_args = 0; while (!feof(stdin) && !told_to_quit) { printf("evallm : \n"); fgets(input_string, sizeof(input_string), stdin); if(strlen(input_string) < sizeof(input_string)-1) input_string[strlen(input_string)-1] = '\0'; //chop new-line else quit(1, "evallm input exceeds size of input buffer"); if (!feof(stdin)) { parse_comline(input_string,&num_of_args,args); log_base = pc_doublearg(&num_of_args,args,"-log_base",10.0); backoff_from_unk_inc = pc_flagarg(&num_of_args,args,"-backoff_from_unk_inc"); backoff_from_ccs_inc = pc_flagarg(&num_of_args,args,"-backoff_from_ccs_inc"); backoff_from_unk_exc = pc_flagarg(&num_of_args,args,"-backoff_from_unk_exc"); backoff_from_ccs_exc = pc_flagarg(&num_of_args,args,"-backoff_from_ccs_exc"); include_unks = pc_flagarg(&num_of_args,args,"-include_unks"); fb_list_filename = rr_salloc(pc_stringarg(&num_of_args,args,"-backoff_from_list","")); text_stream_filename = rr_salloc(pc_stringarg(&num_of_args,args,"-text","")); probs_stream_filename = rr_salloc(pc_stringarg(&num_of_args,args,"-probs","")); annotation_filename = rr_salloc(pc_stringarg(&num_of_args,args,"-annotate","")); oov_filename = rr_salloc(pc_stringarg(&num_of_args,args,"-oovs","")); generate_size = pc_intarg(&num_of_args,args,"-size",10000); random_seed = pc_intarg(&num_of_args,args,"-seed",-1); inconsistant_parameters = 0; if (backoff_from_unk_inc && backoff_from_unk_exc) { fprintf(stderr,"Error : Cannot specify both exclusive and inclusive forced backoff.\n"); fprintf(stderr,"Use only one of -backoff_from_unk_exc and -backoff_from_unk_inc\n"); inconsistant_parameters = 1; } if (backoff_from_ccs_inc && backoff_from_ccs_exc) { fprintf(stderr,"Error : Cannot specify both exclusive and inclusive forced backoff.\n"); fprintf(stderr,"Use only one of -backoff_from_ccs_exc and -backoff_from_ccs_inc\n"); inconsistant_parameters = 1; } if (num_of_args > 0) { if (!inconsistant_parameters) { if (!strcmp(args[0],"perplexity")) { compute_perplexity(&ng, &arpa_ng, text_stream_filename, probs_stream_filename, annotation_filename, oov_filename, fb_list_filename, backoff_from_unk_inc, backoff_from_unk_exc, backoff_from_ccs_inc, backoff_from_ccs_exc, arpa_lm, include_unks, log_base); }else /* do perplexity sentence by sentence [20090612] (air) */ if (!strcmp(args[0],"uttperp")) { FILE *uttfh,*tempfh; char utt[4096]; /* live dangerously... */ char tmpfil[128]; if ((uttfh = fopen(text_stream_filename,"r")) == NULL) { printf("Error: can't open %s\n",text_stream_filename); exit(1); } char *template = "uttperp_XXXXXX";// CHANGED HLW mkstemp(template);// CHANGED HLW
int oe_15_main( int argc, char **argv ) { FILE **fin; ngram *ng; ngram outng; flag *done, finished; int i, j, nfiles; /* Process the command line */ report_version(&argc,argv); procComLine( &argc, argv ) ; if( argc < 2 ) { printUsage( argv[0] ) ; exit( 1 ) ; } nfiles = argc - 1; /* allocate memory */ fin = (FILE **) rr_malloc( sizeof( FILE *) * nfiles ); done = (flag *) rr_malloc( sizeof( flag ) * nfiles ); ng = (ngram *) rr_malloc( sizeof( ngram ) * nfiles ); for( i = 0; i < nfiles; i++ ) { ng[i].id_array = (id__t *) rr_calloc( n, sizeof( id__t ) ); ng[i].n = n; } outng.id_array = (id__t *) rr_calloc( n, sizeof( id__t ) ); outng.n = n; /* open the input files */ for( i = 0; i < nfiles; i++ ) fin[i] = rr_iopen( argv[i+1] ); /* read first ngram from each file */ for( i = 0; i < nfiles; i++ ) { done[i] = 0; if ( !get_ngram( fin[i], &ng[i], ascii_in ) ) done[i] = 1; } finished = 0; while ( !finished ) { /* set outng to max possible */ for( i = 0; i < n; i++ ) outng.id_array[i] = MAX_VOCAB_SIZE; /* find smallest ngram */ for( i = 0; i < nfiles; i++ ) { if ( !done[i] ) if ( cmp_ngram( &outng, &ng[i] ) > 0 ) for( j = 0; j < n; j++ ) outng.id_array[j] = ng[i].id_array[j]; } outng.count = 0; for( i = 0; i < nfiles; i++ ) { if ( !done[i] ) { /* add counts of equal ngrams */ if ( cmp_ngram( &outng, &ng[i] ) == 0 ) { outng.count += ng[i].count; if ( !get_ngram( fin[i], &ng[i], ascii_in ) ) { /* check if all files done */ done[i] = 1; finished = 1; for( j = 0; j < nfiles; j++ ) if ( ! done[j] ) finished = 0; } } } } write_ngram( stdout, &outng, ascii_out ); } for( i = 0; i < nfiles; i++ ) rr_iclose( fin[i] ); fprintf(stderr,"mergeidngram : Done.\n"); return( 0 ); }
void main(int argc, char *argv[]) { int i,j; char *vocab_filename; FILE *tempfile; char tempfiles_directory[1000]; int vocab_size; FILE *vocab_file; int verbosity; int buffer_size; int position_in_buffer; int number_of_tempfiles; int max_files; int fof_size; unsigned short *buffer; unsigned short *placeholder; unsigned short *temp_ngram; int temp_count; char temp_word[500]; char temp_word2[500]; char *temp_file_root; char *temp_file_ext; char *host_name; int proc_id; struct utsname uname_info; flag write_ascii; /* Vocab hash table things */ struct hash_table vocabulary; unsigned long hash_size; unsigned long M; tempfile = NULL; /* Just to prevent compilation warnings. */ report_version(&argc,argv); verbosity = pc_intarg(&argc,argv,"-verbosity",DEFAULT_VERBOSITY); /* Process command line */ if (pc_flagarg( &argc, argv,"-help") || argc==1) { fprintf(stderr,"text2idngram - Convert a text stream to an id n-gram stream.\n"); fprintf(stderr,"Usage : text2idngram -vocab .vocab \n"); fprintf(stderr," [ -buffer 100 ]\n"); fprintf(stderr," [ -hash %d ]\n",DEFAULT_HASH_SIZE); fprintf(stderr," [ -temp %s ]\n",DEFAULT_TEMP); fprintf(stderr," [ -files %d ]\n",DEFAULT_MAX_FILES); fprintf(stderr," [ -gzip | -compress ]\n"); fprintf(stderr," [ -verbosity %d ]\n", DEFAULT_VERBOSITY); fprintf(stderr," [ -n 3 ]\n"); fprintf(stderr," [ -write_ascii ]\n"); fprintf(stderr," [ -fof_size 10 ]\n"); exit(1); } pc_message(verbosity,2,"text2idngram\n"); n = pc_intarg( &argc, argv, "-n",DEFAULT_N); placeholder = (unsigned short *) rr_malloc(sizeof(unsigned short)*n); temp_ngram = (unsigned short *) rr_malloc(sizeof(unsigned short)*n); hash_size = pc_intarg( &argc, argv, "-hash",DEFAULT_HASH_SIZE); buffer_size = pc_intarg( &argc, argv, "-buffer",STD_MEM); write_ascii = pc_flagarg(&argc,argv,"-write_ascii"); fof_size = pc_intarg(&argc,argv,"-fof_size",10); max_files = pc_intarg( &argc, argv, "-files",DEFAULT_MAX_FILES); vocab_filename = salloc(pc_stringarg( &argc, argv, "-vocab", "" )); if (!strcmp("",vocab_filename)) { quit(-1,"text2idngram : Error : Must specify a vocabulary file.\n"); } strcpy(tempfiles_directory,pc_stringarg( &argc, argv, "-temp", DEFAULT_TEMP)); if (pc_flagarg(&argc,argv,"-compress")) { temp_file_ext = salloc(".Z"); } else { if (pc_flagarg(&argc,argv,"-gzip")) { temp_file_ext = salloc(".gz"); } else { temp_file_ext = salloc(""); } } uname(&uname_info); host_name = salloc(uname_info.nodename); proc_id = getpid(); sprintf(temp_word,"%s%s.%d.",TEMP_FILE_ROOT,host_name,proc_id); temp_file_root = salloc(temp_word); pc_report_unk_args(&argc,argv,verbosity); /* If the last charactor in the directory name isn't a / then add one. */ if (tempfiles_directory[strlen(tempfiles_directory)-1] != '/') { strcat(tempfiles_directory,"/"); } pc_message(verbosity,2,"Vocab : %s\n",vocab_filename); pc_message(verbosity,2,"N-gram buffer size : %d\n",buffer_size); pc_message(verbosity,2,"Hash table size : %d\n",hash_size); pc_message(verbosity,2,"Temp directory : %s\n",tempfiles_directory); pc_message(verbosity,2,"Max open files : %d\n",max_files); pc_message(verbosity,2,"FOF size : %d\n",fof_size); pc_message(verbosity,2,"n : %d\n",n); buffer_size *= (1000000/(sizeof(unsigned short)*n)); /* Allocate memory for hash table */ fprintf(stderr,"Initialising hash table...\n"); M = nearest_prime(hash_size); new_hashtable(&vocabulary,M); /* Read in the vocabulary */ vocab_size = 0; vocab_file = rr_iopen(vocab_filename); pc_message(verbosity,2,"Reading vocabulary...\n"); while (fgets (temp_word, sizeof(temp_word),vocab_file)) { if (strncmp(temp_word,"##",2)==0) continue; sscanf (temp_word, "%s ",temp_word2); /* Check for repeated words in the vocabulary */ if (index2(&vocabulary,temp_word2) != 0) { fprintf(stderr,"======================================================\n"); fprintf(stderr,"WARNING: word %s is repeated in the vocabulary.\n",temp_word); fprintf(stderr,"=======================================================\n"); } if (strncmp(temp_word,"#",1)==0) { fprintf(stderr,"\n\n===========================================================\n"); fprintf(stderr,":\nWARNING: line assumed NOT a comment:\n"); fprintf(stderr, ">>> %s <<<\n",temp_word); fprintf(stderr, " '%s' will be included in the vocabulary.\n",temp_word2); fprintf(stderr, " (comments must start with '##')\n"); fprintf(stderr,"===========================================================\n\n"); } vocab_size++; add_to_hashtable(&vocabulary,hash(temp_word2,M),temp_word2,vocab_size); } if (vocab_size > MAX_VOCAB_SIZE) { quit(-1,"text2idngram : Error : Vocabulary size exceeds maximum.\n"); } pc_message(verbosity,2,"Allocating memory for the n-gram buffer...\n"); buffer=(unsigned short*) rr_malloc(n*(buffer_size+1)*sizeof(unsigned short)); number_of_tempfiles = 0; /* Read text into buffer */ /* Read in the first ngram */ position_in_buffer = 0; for (i=0;i<=n-1;i++) { get_word(stdin,temp_word); add_to_buffer(index2(&vocabulary,temp_word),0,i,buffer); } while (!rr_feof(stdin)) { /* Fill up the buffer */ pc_message(verbosity,2,"Reading text into the n-gram buffer...\n"); pc_message(verbosity,2,"20,000 n-grams processed for each \".\", 1,000,000 for each line.\n"); while ((position_in_buffer<buffer_size) && (!rr_feof(stdin))) { position_in_buffer++; if (position_in_buffer % 20000 == 0) { if (position_in_buffer % 1000000 == 0) { pc_message(verbosity,2,".\n"); } else { pc_message(verbosity,2,"."); } } for (i=1;i<=n-1;i++) { add_to_buffer(buffer_contents(position_in_buffer-1,i,buffer), position_in_buffer,i-1,buffer); } if (get_word(stdin,temp_word) == 1) { add_to_buffer(index2(&vocabulary,temp_word),position_in_buffer, n-1,buffer); } } for (i=0;i<=n-1;i++) { placeholder[i] = buffer_contents(position_in_buffer,i,buffer); } /* Sort buffer */ pc_message(verbosity,2,"\nSorting n-grams...\n"); qsort((void*) buffer,(size_t) position_in_buffer, n*sizeof(unsigned short),compare_ngrams); /* Output the buffer to temporary BINARY file */ number_of_tempfiles++; sprintf(temp_word,"%s%s%hu%s",tempfiles_directory,temp_file_root, number_of_tempfiles,temp_file_ext); pc_message(verbosity,2,"Writing sorted n-grams to temporary file %s\n", temp_word); tempfile = rr_oopen(temp_word); for (i=0;i<=n-1;i++) { temp_ngram[i] = buffer_contents(0,i,buffer); if (temp_ngram[i] > MAX_VOCAB_SIZE) { quit(-1,"Invalid trigram in buffer.\nAborting"); } } temp_count = 1; for (i=1;i<=position_in_buffer;i++) { if (!compare_ngrams(temp_ngram,&buffer[i*n])) { temp_count++; } else { for (j=0;j<=n-1;j++) { rr_fwrite(&temp_ngram[j],sizeof(unsigned short),1, tempfile,"temporary n-gram ids"); temp_ngram[j] = buffer_contents(i,j,buffer); } rr_fwrite(&temp_count,sizeof(int),1,tempfile, "temporary n-gram counts"); temp_count = 1; } } rr_oclose(tempfile); for (i=0;i<=n-1;i++) { add_to_buffer(placeholder[i],0,i,buffer); } position_in_buffer = 0; } /* Merge the temporary files, and output the result to standard output */ pc_message(verbosity,2,"Merging temporary files...\n"); merge_tempfiles(1, number_of_tempfiles, temp_file_root, temp_file_ext, max_files, tempfiles_directory, stdout, write_ascii, fof_size); pc_message(verbosity,0,"text2idngram : Done.\n"); exit(0); }
int main (int argc, char **argv) { int n; int verbosity; int max_files; int max_words; int max_chars; int current_word; int current_char; int start_char; /* start boundary (possibly > than 0) */ int no_of_spaces; int pos_in_string; int i; char *current_string; char current_temp_filename[500]; int current_file_number; FILE *temp_file; flag text_buffer_full; char *text_buffer; char **pointers; char current_ngram[500]; int current_count; int counter; char temp_directory[1000]; char *temp_file_ext; flag words_set; flag chars_set; /* Process command line */ verbosity = pc_intarg(&argc, argv,"-verbosity",DEFAULT_VERBOSITY); pc_message(verbosity,2,"text2wngram\n"); report_version(&argc,argv); if (pc_flagarg( &argc, argv,"-help")) { help_message(); exit(1); } n = pc_intarg(&argc, argv,"-n",DEFAULT_N); /* max_words = pc_intarg(&argc, argv,"-words",STD_MEM*1000000/11); max_chars = pc_intarg(&argc, argv,"-chars",STD_MEM*7000000/11); */ max_words = pc_intarg(&argc, argv,"-words",-1); max_chars = pc_intarg(&argc, argv,"-chars",-1); if (max_words == -1) { words_set = 0; max_words = STD_MEM*1000000/11; }else words_set = 1; if (max_chars == -1) { chars_set = 0; max_chars = STD_MEM*7000000/11; }else chars_set = 1; max_files = pc_intarg(&argc, argv,"-files",DEFAULT_MAX_FILES); if (pc_flagarg(&argc,argv,"-compress")) temp_file_ext = salloc(".Z"); else { if (pc_flagarg(&argc,argv,"-gzip")) temp_file_ext = salloc(".gz"); else temp_file_ext = salloc(""); } strcpy(temp_directory, "cmuclmtk-XXXXXX"); if (mkdtemp(temp_directory) == NULL) { quit(-1, "Failed to create temporary folder: %s\n", strerror(errno)); } pc_report_unk_args(&argc,argv,verbosity); if (words_set && !chars_set) max_chars = max_words * 7; if (!words_set && chars_set) max_words = max_chars / 7; /* If the last charactor in the directory name isn't a / then add one. */ pc_message(verbosity,2,"n = %d\n",n); pc_message(verbosity,2,"Number of words in buffer = %d\n",max_words); pc_message(verbosity,2,"Number of chars in buffer = %d\n",max_chars); pc_message(verbosity,2,"Max number of files open at once = %d\n",max_files); pc_message(verbosity,2,"Temporary directory = %s\n",temp_directory); /* Allocate memory for the buffers */ text_buffer = (char *) rr_malloc(sizeof(char)*max_chars); pc_message(verbosity,2,"Allocated %d bytes to text buffer.\n", sizeof(char)*max_chars); pointers = (char **) rr_malloc(sizeof(char *)*max_words); pc_message(verbosity,2,"Allocated %d bytes to pointer array.\n", sizeof(char *)*max_words); current_file_number = 0; current_word = 1; start_char = 0; current_char = 0; counter = 0; pointers[0] = text_buffer; while (!feof(stdin)) { current_file_number++; /* Read text into buffer */ pc_message(verbosity,2,"Reading text into buffer...\n"); pc_message(verbosity,2,"Reading text into the n-gram buffer...\n"); pc_message(verbosity,2,"20,000 words processed for each \".\", 1,000,000 for each line.\n"); pointers[0] = text_buffer; while ((!rr_feof(stdin)) && (current_word < max_words) && (current_char < max_chars)) { text_buffer[current_char] = getchar(); if (text_buffer[current_char] == '\n' || text_buffer[current_char] == '\t' ) { text_buffer[current_char] = ' '; } if (text_buffer[current_char] == ' ') { if (current_char > start_char) { if (text_buffer[current_char-1] == ' ') { current_word--; current_char--; } pointers[current_word] = &(text_buffer[current_char+1]); current_word++; counter++; if (counter % 20000 == 0) { if (counter % 1000000 == 0) pc_message(verbosity,2,"\n"); else pc_message(verbosity,2,"."); } } } if (text_buffer[current_char] != ' ' || current_char > start_char) current_char++; } text_buffer[current_char]='\0'; if (current_word == max_words || rr_feof(stdin)) { for (i=current_char+1;i<=max_chars-1;i++) text_buffer[i] = ' '; text_buffer_full = 0; }else text_buffer_full = 1; /* Sort buffer */ pc_message(verbosity,2,"\nSorting pointer array...\n"); qsort((void *) pointers,(size_t) current_word-n,sizeof(char *),cmp_strings); /* Write out temporary file */ sprintf(current_temp_filename,"%s/%hu%s",temp_directory, current_file_number, temp_file_ext); pc_message(verbosity,2,"Writing out temporary file %s...\n",current_temp_filename); temp_file = rr_oopen(current_temp_filename); text_buffer[current_char] = ' '; current_count = 0; strcpy(current_ngram,""); for (i = 0; i <= current_word-n; i++) { current_string = pointers[i]; /* Find the nth space */ no_of_spaces = 0; pos_in_string = 0; while (no_of_spaces < n) { if (current_string[pos_in_string] == ' ') no_of_spaces++; pos_in_string++; } if (!strncmp(current_string,current_ngram,pos_in_string)) current_count++; else { if (strcmp(current_ngram,"")) if (fprintf(temp_file,"%s %d\n",current_ngram,current_count) < 0) quit(-1,"Error writing to temporary file %s\n",current_temp_filename); current_count = 1; strncpy(current_ngram,current_string,pos_in_string); current_ngram[pos_in_string] = '\0'; } } rr_oclose(temp_file); /* Move the last n-1 words to the beginning of the buffer, and set correct current_word and current_char things */ strcpy(text_buffer,pointers[current_word-n]); pointers[0]=text_buffer; /* Find the (n-1)th space */ no_of_spaces=0; pos_in_string=0; if (!text_buffer_full){ while (no_of_spaces<(n-1)) { if (pointers[0][pos_in_string]==' ') { no_of_spaces++; pointers[no_of_spaces] = &pointers[0][pos_in_string+1]; } pos_in_string++; } }else { while (no_of_spaces<n) { if (pointers[0][pos_in_string]==' ') { no_of_spaces++; pointers[no_of_spaces] = &pointers[0][pos_in_string+1]; } pos_in_string++; } pos_in_string--; } current_char = pos_in_string; current_word = n; /* mark boundary beyond which counting pass cannot backup */ start_char = current_char; } /* Merge temporary files */ pc_message(verbosity,2,"Merging temporary files...\n"); merge_tempfiles(1, current_file_number, temp_directory, temp_file_ext, max_files, stdout, n, verbosity); rmdir(temp_directory); pc_message(verbosity,0,"text2wngram : Done.\n"); return 0; }
int oe_03_main (int argc, char **argv) { flag first_ngram; int n; fof_sz_t fof_size; flag is_ascii; int verbosity; fof_t **fof_array; ngram_sz_t *num_kgrams; ngram current_ngram; ngram previous_ngram; count_t *ng_count; int pos_of_novelty; int nlines; int i; report_version(&argc,argv); if (argc == 1 || pc_flagarg(&argc, argv,"-help")) { oe_04_help_message(); exit(1); } is_ascii = pc_flagarg(&argc, argv,"-ascii_input"); n = pc_intarg(&argc, argv,"-n",3); fof_size = pc_intarg(&argc, argv,"-fof_size",50); verbosity = pc_intarg(&argc, argv,"-verbosity",DEFAULT_VERBOSITY); pc_report_unk_args(&argc,argv,verbosity); pc_message(verbosity,2,"n = %d\n",n); pc_message(verbosity,2,"fof_size = %d\n",fof_size); current_ngram.n = n; previous_ngram.n = n; pos_of_novelty = n; fof_array = (fof_t **) rr_malloc(sizeof(fof_t *) * (n-1)); for (i=0;i<=n-2;i++) fof_array[i] = (fof_t *) rr_calloc(fof_size+1,sizeof(fof_t)); num_kgrams = (ngram_sz_t *) rr_calloc(n-1,sizeof(ngram_sz_t)); ng_count = (count_t *) rr_calloc(n-1,sizeof(count_t)); current_ngram.id_array = (id__t *) rr_calloc(n,sizeof(id__t)); previous_ngram.id_array = (id__t *) rr_calloc(n,sizeof(id__t)); pc_message(verbosity,2,"Processing id n-gram file.\n"); pc_message(verbosity,2,"20,000 n-grams processed for each \".\", 1,000,000 for each line.\n"); nlines = 0; first_ngram = 1; while (!rr_feof(stdin)) { if (!first_ngram) ngram_copy(&previous_ngram,¤t_ngram,n); if (get_ngram(stdin,¤t_ngram,is_ascii)) { nlines++; show_idngram_nlines(nlines, verbosity); /* Test for where this ngram differs from last - do we have an out-of-order ngram? */ if (!first_ngram) pos_of_novelty = ngram_find_pos_of_novelty(¤t_ngram,&previous_ngram,n,nlines); else pos_of_novelty = 0; /* Add new N-gram */ num_kgrams[n-2]++; if (current_ngram.count <= fof_size) fof_array[n-2][current_ngram.count]++; if (!first_ngram) { for (i=n-2;i>=MAX(1,pos_of_novelty);i--) { num_kgrams[i-1]++; if (ng_count[i-1] <= fof_size) fof_array[i-1][ng_count[i-1]]++; ng_count[i-1] = current_ngram.count; } } else { for (i=n-2;i>=MAX(1,pos_of_novelty);i--) ng_count[i-1] = current_ngram.count; } for (i=0;i<=pos_of_novelty-2;i++) ng_count[i] += current_ngram.count; if (first_ngram) first_ngram = 0; } } /* Process last ngram */ for (i=n-2;i>=MAX(1,pos_of_novelty);i--) { num_kgrams[i-1]++; if (ng_count[i-1] <= fof_size) { fof_array[i-1][ng_count[i-1]]++; } ng_count[i-1] = current_ngram.count; } #import "OpenEarsStaticAnalysisToggle.h" #ifdef STATICANALYZEDEPENDENCIES #define __clang_analyzer__ 1 #endif #if !defined(__clang_analyzer__) || defined(STATICANALYZEDEPENDENCIES) #undef __clang_analyzer__ for (i=0;i<=pos_of_novelty-2;i++) ng_count[i] += current_ngram.count; display_fof_array(num_kgrams,fof_array,fof_size,stderr, n); #endif pc_message(verbosity,0,"idngram2stats : Done.\n"); exit(0); }
void Sub::init_ardupilot() { if (!hal.gpio->usb_connected()) { // USB is not connected, this means UART0 may be a Xbee, with // its darned bricking problem. We can't write to it for at // least one second after powering up. Simplest solution for // now is to delay for 1 second. Something more elegant may be // added later hal.scheduler->delay(1000); } // initialise serial port serial_manager.init_console(); cliSerial->printf("\n\nInit " FIRMWARE_STRING "\n\nFree RAM: %u\n", (unsigned)hal.util->available_memory()); // // Report firmware version code expect on console (check of actual EEPROM format version is done in load_parameters function) // report_version(); // load parameters from EEPROM load_parameters(); BoardConfig.init(); // initialise serial port serial_manager.init(); // init cargo gripper #if GRIPPER_ENABLED == ENABLED g2.gripper.init(); #endif // initialise notify system notify.init(true); // initialise battery monitor battery.init(); barometer.init(); celsius.init(); // Register the mavlink service callback. This will run // anytime there are more than 5ms remaining in a call to // hal.scheduler->delay. hal.scheduler->register_delay_callback(mavlink_delay_cb_static, 5); // we start by assuming USB connected, as we initialed the serial // port with SERIAL0_BAUD. check_usb_mux() fixes this if need be. ap.usb_connected = true; check_usb_mux(); // setup telem slots with serial ports for (uint8_t i = 0; i < MAVLINK_COMM_NUM_BUFFERS; i++) { gcs_chan[i].setup_uart(serial_manager, AP_SerialManager::SerialProtocol_MAVLink, i); } // identify ourselves correctly with the ground station mavlink_system.sysid = g.sysid_this_mav; #if LOGGING_ENABLED == ENABLED log_init(); #endif gcs().set_dataflash(&DataFlash); init_rc_in(); // sets up rc channels from radio init_rc_out(); // sets up motors and output to escs init_joystick(); // joystick initialization // initialise which outputs Servo and Relay events can use ServoRelayEvents.set_channel_mask(~motors.get_motor_mask()); relay.init(); /* * setup the 'main loop is dead' check. Note that this relies on * the RC library being initialised. */ hal.scheduler->register_timer_failsafe(failsafe_check_static, 1000); // Do GPS init gps.init(&DataFlash, serial_manager); if (g.compass_enabled) { init_compass(); } #if OPTFLOW == ENABLED // make optflow available to AHRS ahrs.set_optflow(&optflow); #endif // init Location class Location_Class::set_ahrs(&ahrs); #if AP_TERRAIN_AVAILABLE && AC_TERRAIN Location_Class::set_terrain(&terrain); wp_nav.set_terrain(&terrain); #endif #if AVOIDANCE_ENABLED == ENABLED wp_nav.set_avoidance(&avoid); #endif pos_control.set_dt(MAIN_LOOP_SECONDS); // init the optical flow sensor init_optflow(); #if MOUNT == ENABLED // initialise camera mount camera_mount.init(&DataFlash, serial_manager); #endif #ifdef USERHOOK_INIT USERHOOK_INIT #endif #if CLI_ENABLED == ENABLED if (g.cli_enabled) { const char *msg = "\nPress ENTER 3 times to start interactive setup\n"; cliSerial->println(msg); if (gcs_chan[1].initialised && (gcs_chan[1].get_uart() != NULL)) { gcs_chan[1].get_uart()->println(msg); } if (num_gcs > 2 && gcs_chan[2].initialised && (gcs_chan[2].get_uart() != NULL)) { gcs_chan[2].get_uart()->println(msg); } } #endif // CLI_ENABLED #if HIL_MODE != HIL_MODE_DISABLED while (barometer.get_last_update() == 0) { // the barometer begins updating when we get the first // HIL_STATE message gcs_send_text(MAV_SEVERITY_WARNING, "Waiting for first HIL_STATE message"); hal.scheduler->delay(1000); } // set INS to HIL mode ins.set_hil_mode(); #endif // read Baro pressure at ground //----------------------------- init_barometer(false); barometer.update(); for (uint8_t i = 0; i < barometer.num_instances(); i++) { if (barometer.get_type(i) == AP_Baro::BARO_TYPE_WATER && barometer.healthy(i)) { barometer.set_primary_baro(i); ap.depth_sensor_present = true; break; } } if (!ap.depth_sensor_present) { // We only have onboard baro // No external underwater depth sensor detected barometer.set_primary_baro(0); EKF2.set_baro_alt_noise(10.0f); // Readings won't correspond with rest of INS EKF3.set_baro_alt_noise(10.0f); } else { EKF2.set_baro_alt_noise(0.1f); EKF3.set_baro_alt_noise(0.1f); } leak_detector.init(); // backwards compatibility if (attitude_control.get_accel_yaw_max() < 110000.0f) { attitude_control.save_accel_yaw_max(110000.0f); } last_pilot_heading = ahrs.yaw_sensor; // initialise rangefinder #if RANGEFINDER_ENABLED == ENABLED init_rangefinder(); #endif // initialise AP_RPM library #if RPM_ENABLED == ENABLED rpm_sensor.init(); #endif // initialise mission library mission.init(); startup_INS_ground(); // we don't want writes to the serial port to cause us to pause // mid-flight, so set the serial ports non-blocking once we are // ready to fly serial_manager.set_blocking_writes_all(false); // enable CPU failsafe failsafe_enable(); ins.set_raw_logging(should_log(MASK_LOG_IMU_RAW)); ins.set_dataflash(&DataFlash); // init vehicle capabilties init_capabilities(); cliSerial->print("\nReady to FLY "); // flag that initialisation has completed ap.initialised = true; }
int main(int argc, char *argv[]) { int verbosity; int vocab_size; FILE *vocab_file; int buffer_size; flag write_ascii; int max_files; int number_of_tempfiles; char *vocab_filename; char *idngram_filename; char temp_word[MAX_WORD_LENGTH]; char temp_word2[MAX_WORD_LENGTH]; char temp_word3[MAX_WORD_LENGTH]; flag contains_unks; int position_in_buffer; FILE *outfile; FILE *tempfile; FILE *non_unk_fp; ngram_rec *buffer; flag same_ngram; int i; int j; int fof_size; int size_of_rec; char temp_directory[1000]; char *temp_file_ext; /* Vocab hash table things */ struct idngram_hash_table vocabulary; unsigned long hash_size; unsigned long M; wordid_t *current_ngram; int current_count; wordid_t *sort_ngram; int sort_count; /* Process command line */ report_version(&argc,argv); if (argc == 1 || pc_flagarg(&argc, argv,"-help")) { /* Display help message */ help_message(); exit(1); } n = pc_intarg( &argc, argv, "-n",DEFAULT_N); hash_size = pc_intarg( &argc, argv, "-hash",DEFAULT_HASH_SIZE); buffer_size = pc_intarg( &argc, argv, "-buffer",STD_MEM); write_ascii = pc_flagarg(&argc,argv,"-write_ascii"); verbosity = pc_intarg(&argc,argv,"-verbosity",DEFAULT_VERBOSITY); max_files = pc_intarg( &argc, argv, "-files",DEFAULT_MAX_FILES); fof_size = pc_intarg(&argc,argv,"-fof_size",10); vocab_filename = salloc(pc_stringarg( &argc, argv, "-vocab", "" )); idngram_filename = salloc(pc_stringarg( &argc, argv, "-idngram", "" )); if (!strcmp("",vocab_filename)) quit(-1,"Error : Must specify a vocabulary file.\n"); if (!strcmp("",idngram_filename)) quit(-1,"text2idngram : Error : Must specify idngram file.\n"); if (pc_flagarg(&argc,argv,"-compress")) temp_file_ext = salloc(".Z"); else { if (pc_flagarg(&argc,argv,"-gzip")) temp_file_ext = salloc(".gz"); else temp_file_ext = salloc(""); } strcpy(temp_directory, "cmuclmtk-XXXXXX"); if (mkdtemp(temp_directory) == NULL) { quit(-1, "Failed to create temporary folder: %s\n", strerror(errno)); } pc_report_unk_args(&argc,argv,verbosity); outfile = rr_fopen(idngram_filename,"wb"); pc_message(verbosity,2,"Vocab : %s\n",vocab_filename); pc_message(verbosity,2,"Output idngram : %s\n",idngram_filename); pc_message(verbosity,2,"Buffer size : %d\n",buffer_size); pc_message(verbosity,2,"Hash table size : %d\n",hash_size); pc_message(verbosity,2,"Max open files : %d\n",max_files); pc_message(verbosity,2,"n : %d\n",n); pc_message(verbosity,2,"FOF size : %d\n",fof_size); size_of_rec = (sizeof(wordid_t) * n) + 16 - (( n* sizeof(wordid_t)) % 16); buffer_size *= (1000000/((sizeof(ngram_rec) + size_of_rec))); fprintf(stderr,"buffer size = %d\n",buffer_size); /* Allocate memory for hash table */ fprintf(stderr,"Initialising hash table...\n"); M = nearest_prime(hash_size); new_idngram_hashtable(&vocabulary,M); /* Read in the vocabulary */ vocab_size = 0; vocab_file = rr_iopen(vocab_filename); pc_message(verbosity,2,"Reading vocabulary...\n"); while (fgets (temp_word, sizeof(temp_word),vocab_file)) { if (strncmp(temp_word,"##",2)==0) continue; sscanf (temp_word, "%s ",temp_word2); /* Check for vocabulary order */ if (vocab_size > 0 && strcmp(temp_word2,temp_word3)<0) quit(-1,"wngram2idngram : Error : Vocabulary is not alphabetically ordered.\n"); /* Check for repeated words in the vocabulary */ if (index2(&vocabulary,temp_word2) != 0) warn_on_repeated_words(temp_word); warn_on_wrong_vocab_comments(temp_word); vocab_size++; add_to_idngram_hashtable(&vocabulary,idngram_hash(temp_word2,M),temp_word2,vocab_size); strcpy(temp_word3,temp_word2); } if (vocab_size > MAX_VOCAB_SIZE) quit(-1,"Error : Vocabulary size exceeds maximum.\n"); pc_message(verbosity,2,"Allocating memory for the buffer...\n"); buffer=(ngram_rec *) rr_malloc((buffer_size+1)*sizeof(ngram_rec)); for (i=0;i<=buffer_size;i++) buffer[i].word = (wordid_t *) rr_malloc(n*sizeof(wordid_t)); /* Open the "non-OOV" tempfile */ sprintf(temp_word, "%s/1%s", temp_directory, temp_file_ext); non_unk_fp = rr_fopen(temp_word,"w"); pc_message(verbosity,2,"Writing non-OOV counts to temporary file %s\n", temp_word); number_of_tempfiles = 1; current_ngram = (wordid_t *) rr_malloc(n*sizeof(wordid_t)); sort_ngram = (wordid_t *) rr_malloc(n*sizeof(wordid_t)); /* Read text into buffer */ position_in_buffer = 0; while (!rr_feof(stdin)) { for (i=0;i<=n-1;i++) { get_word(stdin,temp_word); current_ngram[i]=index2(&vocabulary,temp_word); } if (scanf("%d",¤t_count) != 1) if (!rr_feof(stdin)) quit(-1,"Error reading n-gram count from stdin.\n"); if (!rr_feof(stdin)) { contains_unks = 0; for (i=0;i<=n-1;i++) { if (!current_ngram[i]) contains_unks = 1; } if (contains_unks) { /* Write to buffer */ position_in_buffer++; if (position_in_buffer >= buffer_size) { /* Sort buffer */ pc_message(verbosity,2, "Sorting n-grams which include an OOV word...\n"); qsort((void*) buffer,(size_t) position_in_buffer, sizeof(ngram_rec),compare_ngrams2); pc_message(verbosity,2,"Done.\n"); /* Write buffer to temporary file */ number_of_tempfiles++; sprintf(temp_word,"%s/%hu%s", temp_directory, number_of_tempfiles,temp_file_ext); pc_message(verbosity,2, "Writing sorted OOV-counts buffer to temporary file %s\n", temp_word); tempfile = rr_fopen(temp_word,"w"); for (i=0;i<=n-1;i++) sort_ngram[i] = buffer[0].word[i]; sort_count = buffer[0].count; for (i=0;i<=position_in_buffer-2;i++) { same_ngram = 1; for (j=n-1;j>=0;j--) { if (buffer[i].word[j] != sort_ngram[j]) { same_ngram = 0; j = -1; } } if (same_ngram) sort_count += buffer[i].count; else { for (j=0;j<=n-1;j++) { rr_fwrite((char*)&sort_ngram[j],sizeof(wordid_t),1, tempfile,"temporary n-gram ids"); sort_ngram[j] = buffer[i].word[j]; } rr_fwrite((char*)&sort_count,sizeof(int),1,tempfile, "temporary n-gram counts"); sort_count = buffer[i].count; } } for (j=0;j<=n-1;j++) rr_fwrite((char*)&sort_ngram[j],sizeof(wordid_t),1, tempfile,"temporary n-gram ids"); rr_fwrite((char*)&sort_count,sizeof(int),1,tempfile, "temporary n-gram counts"); rr_oclose(tempfile); position_in_buffer = 1; } for (i=0;i<=n-1;i++) buffer[position_in_buffer-1].word[i] = current_ngram[i]; buffer[position_in_buffer-1].count = current_count; }else { /* Write to temporary file */ for (i=0;i<=n-1;i++) rr_fwrite((char*)¤t_ngram[i],sizeof(wordid_t),1, non_unk_fp,"temporary n-gram ids"); rr_fwrite((char*)¤t_count,sizeof(int),1,non_unk_fp, "temporary n-gram counts"); } } } if (position_in_buffer > 0) { /* Only do this bit if we have actually seen some OOVs */ /* Sort final buffer */ pc_message(verbosity,2,"Sorting final buffer...\n"); qsort((void*) buffer,(size_t) position_in_buffer, sizeof(ngram_rec),compare_ngrams2); /* Write final buffer */ number_of_tempfiles++; sprintf(temp_word,"%s/%hu%s", temp_directory, number_of_tempfiles,temp_file_ext); pc_message(verbosity,2,"Writing sorted buffer to temporary file %s\n", temp_word); tempfile = rr_fopen(temp_word,"w"); for (i=0;i<=n-1;i++) sort_ngram[i] = buffer[0].word[i]; sort_count = buffer[0].count; for (i=1;i<=position_in_buffer-1;i++) { same_ngram = 1; for (j=n-1;j>=0;j--) { if (buffer[i].word[j] != sort_ngram[j]) { same_ngram = 0; j = -1; } } if (same_ngram) sort_count += buffer[i].count; else { for (j=0;j<=n-1;j++) { rr_fwrite((char*)&sort_ngram[j],sizeof(wordid_t),1, tempfile,"temporary n-gram ids"); sort_ngram[j] = buffer[i].word[j]; } rr_fwrite((char*)&sort_count,sizeof(int),1,tempfile, "temporary n-gram counts"); sort_count = buffer[i].count; } } for (j=0;j<=n-1;j++) rr_fwrite((char*)&sort_ngram[j],sizeof(wordid_t),1, tempfile,"temporary n-gram ids"); rr_fwrite((char*)&sort_count,sizeof(int),1,tempfile, "temporary n-gram counts"); fclose(tempfile); } /* Merge the temporary files, and output the result */ fclose(non_unk_fp); pc_message(verbosity,2,"Merging temporary files...\n"); merge_idngramfiles(1, number_of_tempfiles, temp_directory, temp_file_ext, max_files, outfile, write_ascii, fof_size, n); fclose(outfile); rmdir(temp_directory); pc_message(verbosity,0,"wngram2idngram : Done.\n"); return 0; }
void main(int argc, char *argv[]) { int verbosity; int n; int m; int i; int input_type; int storage_type; unsigned short *current_ngram_int; unsigned short *previous_ngram_int; char **current_ngram_text; char **previous_ngram_text; int current_count; int running_total; flag same; flag first_one; flag got_to_eof; running_total = 0; report_version(&argc,argv); if (pc_flagarg( &argc, argv,"-help") || argc==1) { fprintf(stderr,"ngram2mgram - Convert an n-gram file to an m-gram file, where m<n\n"); fprintf(stderr,"Usage : ngram2mgram -n N -m M\n"); fprintf(stderr," [ -binary | -ascii | -words ]\n"); fprintf(stderr," < .ngram > .mgram\n"); exit(1); } n = pc_intarg( &argc, argv,"-n",0); m = pc_intarg( &argc, argv,"-m",0); verbosity = pc_intarg(&argc,argv,"-verbosity",DEFAULT_VERBOSITY); input_type = 0; if (pc_flagarg( &argc, argv,"-binary")) { input_type = BINARY; } if (pc_flagarg( &argc, argv,"-ascii")) { if (input_type != 0) { quit(-1,"Error : more than one file format specified.\n"); } input_type = ASCII; } if (pc_flagarg( &argc, argv,"-words")) { if (input_type != 0) { quit(-1,"Error : more than one file format specified.\n"); } input_type = WORDS; } if (input_type == 0) { pc_message(verbosity,2,"Warning : no input type specified. Defaulting to binary.\n"); input_type = BINARY; } if (n == 0) { quit(-1,"Must specify a value for n. Use the -n switch.\n"); } if (m == 0) { quit(-1,"Must specify a value for m. Use the -m switch.\n"); } if (n<=m) { quit(-1,"n must be greater than m.\n"); } pc_report_unk_args(&argc,argv,verbosity); if (input_type == BINARY || input_type == ASCII) { storage_type = NUMERIC; } else { storage_type = ALPHA; } if (storage_type == NUMERIC) { current_ngram_int = (unsigned short *) rr_malloc(n*sizeof(unsigned short)); previous_ngram_int = (unsigned short *) rr_malloc(n*sizeof(unsigned short)); /* And to prevent compiler warnings ... */ current_ngram_text = NULL; previous_ngram_text = NULL; } else { current_ngram_text = (char **) rr_malloc(n*sizeof(char *)); previous_ngram_text = (char **) rr_malloc(n*sizeof(char *)); for (i=0;i<=n-1;i++) { current_ngram_text[i] = (char *) rr_malloc(MAX_WORD_LENGTH*sizeof(char)); previous_ngram_text[i] = (char *) rr_malloc(MAX_WORD_LENGTH*sizeof(char)); } /* And to prevent compiler warnings ... */ current_ngram_int = NULL; previous_ngram_int = NULL; } got_to_eof = 0; first_one = 1; while (!rr_feof(stdin)) { /* Store previous n-gram */ if (!first_one) { if (storage_type == NUMERIC) { for (i=0;i<=n-1;i++) { previous_ngram_int[i] = current_ngram_int[i]; } } else { for (i=0;i<=n-1;i++) { strcpy(previous_ngram_text[i],current_ngram_text[i]); } } } /* Read new n-gram */ switch(input_type) { case BINARY: for (i=0;i<=n-1;i++) { rr_fread(¤t_ngram_int[i],sizeof(id__t),1,stdin, "from id_ngrams at stdin",0); } rr_fread(¤t_count,sizeof(count_t),1,stdin, "from id_ngrams file at stdin",0); break; case ASCII: for (i=0;i<=n-1;i++) { if (fscanf(stdin,"%hu",¤t_ngram_int[i]) != 1) { if (!rr_feof(stdin)) { quit(-1,"Error reading id_ngram.\n"); } else { got_to_eof = 1; } } } if (fscanf(stdin,"%d",¤t_count) != 1) { if (!rr_feof(stdin)) { quit(-1,"Error reading id_ngram.\n"); } else { got_to_eof = 1; } } break; case WORDS: for (i=0;i<=n-1;i++) { if (fscanf(stdin,"%s",current_ngram_text[i]) != 1) { if (!rr_feof(stdin)) { quit(-1,"Error reading id_ngram.\n"); } else { got_to_eof = 1; } } } if (fscanf(stdin,"%d",¤t_count) != 1) { if (!rr_feof(stdin)) { quit(-1,"Error reading id_ngram.\n"); } else { got_to_eof = 1; } } break; } if (!got_to_eof) { /* Check for correct sorting */ if (!first_one) { switch(storage_type) { case NUMERIC: for (i=0;i<=n-1;i++) { if (current_ngram_int[i]<previous_ngram_int[i]) { quit(-1,"Error : ngrams not correctly sorted.\n"); } else { if (current_ngram_int[i]>previous_ngram_int[i]) { i=n; } } } break; case ALPHA: for (i=0;i<=n-1;i++) { if (strcmp(current_ngram_text[i],previous_ngram_text[i])<0) { quit(-1,"Error : ngrams not correctly sorted.\n"); } else { if (strcmp(current_ngram_text[i],previous_ngram_text[i])>0) { i=n; } } } break; } } /* Compare this m-gram with previous m-gram */ if (!first_one) { switch(storage_type) { case NUMERIC: same = 1; for (i=0;i<=m-1;i++) { if (current_ngram_int[i] != previous_ngram_int[i]) { same = 0; } } if (same) { running_total += current_count; } else { if (input_type == ASCII) { for (i=0;i<=m-1;i++) { printf("%d ",previous_ngram_int[i]); } printf("%d\n",running_total); } else { for (i=0;i<=m-1;i++) { rr_fwrite(&previous_ngram_int[i],sizeof(id__t),1,stdout, "to id_ngrams at stdout"); } rr_fwrite(&running_total,sizeof(count_t),1,stdout, "to id n-grams at stdout"); } running_total = current_count; } break; case ALPHA: same = 1; for (i=0;i<=m-1;i++) { if (strcmp(current_ngram_text[i],previous_ngram_text[i])) { same = 0; } } if (same) { running_total += current_count; } else { for (i=0;i<=m-1;i++) { printf("%s ",previous_ngram_text[i]); } printf("%d\n",running_total); running_total = current_count; } break; } } else { running_total = current_count; } first_one = 0; } } /* Write out final m-gram */ switch(input_type) { case BINARY: break; case ASCII: for (i=0;i<=m-1;i++) { printf("%d ",previous_ngram_int[i]); } printf("%d\n",running_total); break; case WORDS: for (i=0;i<=m-1;i++) { printf("%s ",previous_ngram_text[i]); } printf("%d\n",running_total); break; } pc_message(verbosity,0,"ngram2mgram : Done.\n"); exit(0); }
void parse_args(int argc, char** argv, struct prefs* v) { gboolean in_loop = TRUE; struct option long_options[] = { { "font-size-modifier", 1, NULL, 'z' }, { "black", 1, NULL, '1' }, { "red", 1, NULL, '2' }, { "green", 1, NULL, '3' }, { "yellow", 1, NULL, '4' }, { "blue", 1, NULL, '5' }, { "magenta", 1, NULL, '6' }, { "cyan", 1, NULL, '7' }, { "white", 1, NULL, '8' }, { "jump-resize", 2, NULL, 'j' }, { "file-icons", 2, NULL, 'i' }, { "version", 0, NULL, 'V' }, }; GdkColor color_temp; optind = 0; while (in_loop) { switch (fgetopt_long(argc, argv, "j::z:i::vV", long_options, NULL)) { case -1: in_loop = FALSE; break; /* Font size modifier */ case 'z': v->font_size_modifier = CLAMP(atoi(optarg), -10, 10); break; /* Enable or disable icons */ case 'i': if (!optarg || STREQ(optarg, "on")) v->show_icons = TRUE; else if (STREQ(optarg, "off")) v->show_icons = FALSE; break; /* Enable or disable jump-resize */ case 'j': if (!optarg || STREQ(optarg, "on")) v->jump_resize = TRUE; else if (STREQ(optarg, "off")) v->jump_resize = FALSE; break; /* Colours */ case '1': if (gdk_color_parse(optarg, &color_temp)) set_color(TCC_BLACK, &color_temp); break; case '2': if (gdk_color_parse(optarg, &color_temp)) set_color(TCC_RED, &color_temp); break; case '3': if (gdk_color_parse(optarg, &color_temp)) set_color(TCC_GREEN, &color_temp); break; case '4': if (gdk_color_parse(optarg, &color_temp)) set_color(TCC_YELLOW, &color_temp); break; case '5': if (gdk_color_parse(optarg, &color_temp)) set_color(TCC_BLUE, &color_temp); break; case '6': if (gdk_color_parse(optarg, &color_temp)) set_color(TCC_MAGENTA, &color_temp); break; case '7': if (gdk_color_parse(optarg, &color_temp)) set_color(TCC_CYAN, &color_temp); break; case '8': if (gdk_color_parse(optarg, &color_temp)) set_color(TCC_WHITE, &color_temp); break; case 'v': case 'V': report_version(); break; case ':': g_warning("Option missing argument"); /*exit(EXIT_FAILURE);*/ break; case '?': default: g_warning("Unknown option provided"); /*exit(EXIT_FAILURE);*/ break; } } }
static int handle_options(const unsigned char *opt) { const unsigned char *n = NULL, *p; if (!strcmp("--version", opt)) { report_version(); } else if (!strcmp("--help", opt)) { report_help(); } else if ((p = check_option("--stdin", opt))) { stdin_file = p; } else if ((p = check_option("--stdout", opt))) { stdout_file = p; } else if ((p = check_option("--stderr", opt))) { stderr_file = p; } else if ((p = check_option("--workdir", opt))) { working_dir = p; } else if ((p = check_option("--test-file", opt))) { test_file = p; } else if ((p = check_option("--corr-file", opt))) { corr_file = p; } else if ((p = check_option("--info-file", opt))) { info_file = p; } else if ((p = check_option("--input-file", opt))) { input_file = p; } else if ((p = check_option("--output-file", opt))) { output_file = p; } else if (!strcmp("--clear-env", opt)) { clear_env_flag = 1; } else if ((p = check_option("--env", opt))) { xexpand(&env_vars); env_vars.v[env_vars.u++] = xstrdup(p); } else if ((p = check_option((n = "--time-limit"), opt))) { parse_int(n, p, &time_limit, 1, 99999); } else if ((p = check_option((n = "--time-limit-millis"), opt))) { parse_int(n, p, &time_limit_millis, 1, 999999999); } else if ((p = check_option((n = "--real-time-limit"), opt))) { parse_int(n, p, &real_time_limit, 1, 99999); } else if (!strcmp("--no-core-dump", opt)) { no_core_dump = 1; } else if ((p = check_option("--kill-signal", opt))) { kill_signal = p; } else if (!strcmp("--memory-limit", opt)) { memory_limit = 1; } else if (!strcmp("--secure-exec", opt)) { secure_exec = 1; } else if (!strcmp("--security-violation", opt)) { security_violation = 1; } else if (!strcmp("--use-stdin", opt)) { use_stdin = 1; } else if (!strcmp("--use-stdout", opt)) { use_stdout = 1; } else if ((p = check_option((n = "--max-vm-size"), opt))) { parse_size(n, p, &max_vm_size, 4096); } else if ((p = check_option((n = "--max-stack-size"), opt))) { parse_size(n, p, &max_stack_size, 4096); } else if ((p = check_option((n = "--max-data-size"), opt))) { parse_size(n, p, &max_data_size, 4096); } else if ((p = check_option((n = "--mode"), opt))) { parse_mode(n, p, &mode); } else if ((p = check_option((n = "--group"), opt))) { parse_group(n, p, &group); } else if ((p = check_option((n = "--test-num"), opt))) { parse_int(n, p, &test_num, 1, 99999); } else if ((p = check_option("--test-pattern", opt))) { test_pattern = p; } else if ((p = check_option("--corr-pattern", opt))) { corr_pattern = p; } else if ((p = check_option("--info-pattern", opt))) { info_pattern = p; } else if ((p = check_option("--tgzdir-pattern", opt))) { tgzdir_pattern = p; } else if (!strcmp("--update-corr", opt)) { update_corr = 1; } else if ((p = check_option("--test-dir", opt))) { test_dir = p; } else if (!strcmp("--all-tests", opt)) { all_tests = 1; } else if (!strcmp("--quiet", opt)) { quiet_flag = 1; } else if (!strcmp("--", opt)) { return 1; } else if (!strncmp("--", opt, 2)) { fatal("invalid option %s"); } else { return 2; } return 0; }
void main (int argc, char **argv) { ng_t ng; arpa_lm_t arpa_ng; char input_string[500]; int num_of_args; char *args[MAX_ARGS]; char *lm_filename_arpa; char *lm_filename_binary; flag told_to_quit; flag inconsistant_parameters; flag backoff_from_unk_inc; flag backoff_from_unk_exc; flag backoff_from_ccs_inc; flag backoff_from_ccs_exc; flag arpa_lm; flag binary_lm; flag include_unks; char *fb_list_filename; char *probs_stream_filename; char *annotation_filename; char *text_stream_filename; char *oov_filename; char *ccs_filename; double log_base; char wlist_entry[1024]; char current_cc[200]; int current_cc_id; FILE *context_cues_fp; int n; int generate_size = 10000; int random_seed; /* Process command line */ report_version(&argc,argv); if (pc_flagarg(&argc, argv,"-help") || argc == 1 || (strcmp(argv[1],"-binary") && strcmp(argv[1],"-arpa"))) { fprintf(stderr,"evallm : Evaluate a language model.\n"); fprintf(stderr,"Usage : evallm [ -binary .binlm | \n"); fprintf(stderr," -arpa .arpa [ -context .ccs ] ]\n"); exit(1); } lm_filename_arpa = salloc(pc_stringarg(&argc, argv,"-arpa","")); if (strcmp(lm_filename_arpa,"")) { arpa_lm = 1; } else { arpa_lm = 0; } lm_filename_binary = salloc(pc_stringarg(&argc, argv,"-binary","")); if (strcmp(lm_filename_binary,"")) { binary_lm = 1; } else { binary_lm = 0; } if (arpa_lm && binary_lm) { quit(-1,"Error : Can't use both -arpa and -binary flags.\n"); } if (!arpa_lm && !binary_lm) { quit(-1,"Error : Must specify either a binary or an arpa format language model.\n"); } ccs_filename = salloc(pc_stringarg(&argc, argv,"-context","")); if (binary_lm && strcmp(ccs_filename,"")) { fprintf(stderr,"Warning - context cues file not needed with binary language model file.\nWill ignore it.\n"); } pc_report_unk_args(&argc,argv,2); /* Load language model */ if (arpa_lm) { fprintf(stderr,"Reading in language model from file %s\n", lm_filename_arpa); load_arpa_lm(&arpa_ng,lm_filename_arpa); } else { fprintf(stderr,"Reading in language model from file %s\n", lm_filename_binary); load_lm(&ng,lm_filename_binary); } fprintf(stderr,"\nDone.\n"); if (!arpa_lm) { n=ng.n; } else { n=arpa_ng.n; } if (arpa_lm) { arpa_ng.context_cue = (flag *) rr_calloc(arpa_ng.table_sizes[0],sizeof(flag)); arpa_ng.no_of_ccs = 0; if (strcmp(ccs_filename,"")) { context_cues_fp = rr_iopen(ccs_filename); while (fgets (wlist_entry, sizeof (wlist_entry),context_cues_fp)) { if (strncmp(wlist_entry,"##",2)==0) continue; sscanf (wlist_entry, "%s ",current_cc); if (strncmp(wlist_entry,"#",1)==0) { fprintf(stderr,"\n\n===========================================================\n"); fprintf(stderr,":\nWARNING: line assumed NOT a comment:\n"); fprintf(stderr, ">>> %s <<<\n",wlist_entry); fprintf(stderr, " '%s' will be included in the context cues list\n",current_cc); fprintf(stderr, " (comments must start with '##')\n"); fprintf(stderr,"===========================================================\n\n"); } if (sih_lookup(arpa_ng.vocab_ht,current_cc,¤t_cc_id) == 0) { quit(-1,"Error : %s in the context cues file does not appear in the vocabulary.\n",current_cc); } arpa_ng.context_cue[(unsigned short) current_cc_id] = 1; arpa_ng.no_of_ccs++; fprintf(stderr,"Context cue word : %s id = %d\n",current_cc,current_cc_id); } rr_iclose(context_cues_fp); } } /* Process commands */ told_to_quit = 0; num_of_args = 0; while (!feof(stdin) && !told_to_quit) { printf("evallm : "); gets(input_string); if (!feof(stdin)) { parse_comline(input_string,&num_of_args,args); random_seed = pc_intarg(&num_of_args,args,"-seed",-1); generate_size = pc_intarg(&num_of_args,args,"-size",10000); log_base = pc_doublearg(&num_of_args,args,"-log_base",10.0); backoff_from_unk_inc = pc_flagarg(&num_of_args,args, "-backoff_from_unk_inc"); backoff_from_ccs_inc = pc_flagarg(&num_of_args,args, "-backoff_from_ccs_inc"); backoff_from_unk_exc = pc_flagarg(&num_of_args,args, "-backoff_from_unk_exc"); backoff_from_ccs_exc = pc_flagarg(&num_of_args,args, "-backoff_from_ccs_exc"); include_unks = pc_flagarg(&num_of_args,args,"-include_unks"); fb_list_filename = salloc(pc_stringarg(&num_of_args,args, "-backoff_from_list","")); text_stream_filename = salloc(pc_stringarg(&num_of_args,args,"-text","")); probs_stream_filename = salloc(pc_stringarg(&num_of_args,args,"-probs","")); annotation_filename = salloc(pc_stringarg(&num_of_args,args,"-annotate","")); oov_filename = salloc(pc_stringarg(&num_of_args,args,"-oovs","")); inconsistant_parameters = 0; if (backoff_from_unk_inc && backoff_from_unk_exc) { fprintf(stderr,"Error : Cannot specify both exclusive and inclusive forced backoff.\n"); fprintf(stderr,"Use only one of -backoff_from_unk_exc and -backoff_from_unk_inc\n"); inconsistant_parameters = 1; } if (backoff_from_ccs_inc && backoff_from_ccs_exc) { fprintf(stderr,"Error : Cannot specify both exclusive and inclusive forced backoff.\n"); fprintf(stderr,"Use only one of -backoff_from_ccs_exc and -backoff_from_ccs_inc\n"); inconsistant_parameters = 1; } if (num_of_args > 0) { if (!inconsistant_parameters) { if (!strcmp(args[0],"perplexity")) { compute_perplexity(&ng, &arpa_ng, text_stream_filename, probs_stream_filename, annotation_filename, oov_filename, fb_list_filename, backoff_from_unk_inc, backoff_from_unk_exc, backoff_from_ccs_inc, backoff_from_ccs_exc, arpa_lm, include_unks, log_base); } else { if (!strcmp(args[0],"validate")) { if (num_of_args != n) { fprintf(stderr,"Error : must specify %d words of context.\n", n-1); } else { /* Assume last n-1 parameters form context */ validate(&ng, &arpa_ng, &(args[num_of_args-n+1]), backoff_from_unk_inc, backoff_from_unk_exc, backoff_from_ccs_inc, backoff_from_ccs_exc, arpa_lm, fb_list_filename); } } else { if (!strcmp(args[0],"stats")) { if (arpa_lm) { display_arpa_stats(&arpa_ng); } else { display_stats(&ng); } } else { if (!strcmp(args[0],"quit")) { told_to_quit=1; } else if (!strcmp(args[0],"generate")) { if(arpa_lm) generate_words(NULL,&arpa_ng,generate_size,random_seed,text_stream_filename); else generate_words(&ng,NULL,generate_size,random_seed,text_stream_filename); } else { if (!strcmp(args[0],"help")) { printf("The user may specify one of the following commands: \n"); printf("\n"); printf(" - perplexity\n"); printf("\n"); printf("Computes the perplexity of a given text. May optionally specify words\n"); printf("from which to force back-off.\n"); printf("\n"); printf("Syntax: \n"); printf("\n"); printf("perplexity -text .text\n"); printf(" [ -probs .fprobs ]\n"); printf(" [ -oovs .oov_file ]\n"); printf(" [ -annotate .annotation_file ] \n"); printf(" [ -backoff_from_unk_inc | -backoff_from_unk_exc ]\n"); printf(" [ -backoff_from_ccs_inc | -backoff_from_ccs_exc ] \n"); printf(" [ -backoff_from_list .fblist ]\n"); printf(" [ -include_unks ]\n"); printf("\n"); printf(" - validate\n"); printf(" \n"); printf("Calculate the sum of the probabilities of all the words in the\n"); printf("vocabulary given the context specified by the user.\n"); printf("\n"); printf("Syntax: \n"); printf("\n"); printf("validate [ -backoff_from_unk -backoff_from_ccs |\n"); printf(" -backoff_from_list .fblist ]\n"); printf(" [ -forced_backoff_inc | -forced_back_off_exc ] \n"); printf(" word1 word2 ... word_(n-1)\n"); printf("\n"); printf("Where n is the n in n-gram. \n"); printf("\n"); printf(" - help\n"); printf("\n"); printf("Displays this help message.\n"); printf("\n"); printf("Syntax: \n"); printf("\n"); printf("help\n"); printf("\n"); printf(" - quit\n"); printf("\n"); printf("Exits the program.\n"); printf("\n"); printf("Syntax: \n"); printf("\n"); printf("quit\n"); } else { fprintf(stderr,"Unknown command : %s\nType \'help\'\n", args[0]); } } } } } } } } } fprintf(stderr,"evallm : Done.\n"); exit(0); }
int main(int argc, char **argv) { int i,j; ng_t* ng; int verbosity; int mem_alloc_method; /* Method used to decide how much memory to allocate for count tables */ int buffer_size; flag is_ascii; ngram current_ngram; ngram previous_ngram; count_t *ng_count; /* Array indicating the number of occurrances of the current 1-gram, 2-gram, ... ,n-gram Size depends on #define in general.h */ int nlines; int pos_of_novelty; int prev_id1; flag contains_unks; int mem_alloced; flag displayed_oov_warning; /** Display OOV warning */ /* ------------------ Process command line --------------------- */ report_version(&argc,argv); if (argc == 1 || pc_flagarg(&argc, argv,"-help")) { /* Display help message */ help_message(); exit(1); } verbosity = pc_intarg(&argc, argv,"-verbosity",DEFAULT_VERBOSITY); /* Initialization */ { ng=init_ng( &argc, argv, verbosity ); mem_alloc_method = init_alloc_method(ng, &argc, argv, &buffer_size); if (!strcmp(ng->id_gram_filename,"-") && mem_alloc_method == TWO_PASSES) quit(-1,"Error: If idngram is read from stdin, then cannot use -calc_mem option.\n"); is_ascii = set_lmformat(pc_flagarg(&argc,argv,"-ascii_input"), pc_flagarg(&argc,argv,"-bin_input"), ng); /* Report parameters */ report_param(verbosity,ng, is_ascii, mem_alloc_method, buffer_size); pc_report_unk_args(&argc,argv,verbosity); } /* --------------- Read in the vocabulary -------------- */ read_vocab(ng,verbosity); /* --------------- Allocate space for the table_size array --------- */ init_ng_table_size(ng, mem_alloc_method, is_ascii, verbosity, buffer_size ); /* ----------- Allocate memory for tree structure -------------- */ ng->count = NULL; ng->count4 = NULL; ng->marg_counts = NULL; ng->marg_counts4 = NULL; ng->count_table = NULL; ng->count = (count_ind_t **) rr_malloc(sizeof(count_ind_t *)*ng->n); ng->count4 = (count_t **) rr_malloc(sizeof(count_t *)*ng->n); ng->count_table = (count_t **) rr_malloc(sizeof(count_t *)*ng->n); if (ng->four_byte_counts) { ng->marg_counts4 = (count_t *) rr_calloc(sizeof(count_t), ng->table_sizes[0]); }else { for (i=0;i<=ng->n-1;i++) ng->count_table[i] = (count_t *) rr_calloc(ng->count_table_size+1, sizeof(count_t)); ng->marg_counts = (count_ind_t *) rr_calloc(sizeof(count_ind_t),ng->table_sizes[0]); fprintf(stderr, "table_size %d\n",ng->table_sizes[0]); fflush(stderr); } ng->word_id = (id__t **) rr_malloc(sizeof(id__t *)*ng->n); if (ng->four_byte_alphas) { ng->bo_weight4 = (four_byte_t **) rr_malloc(sizeof(four_byte_t *)*ng->n); ng->bo_weight4[0] = (four_byte_t *) rr_malloc(sizeof(four_byte_t)* ng->table_sizes[0]); }else { ng->bo_weight = (bo_weight_t **) rr_malloc(sizeof(bo_weight_t *)*ng->n); ng->bo_weight[0] = (bo_weight_t *) rr_malloc(sizeof(bo_weight_t)* ng->table_sizes[0]); } ng->ind = (index__t **) rr_malloc(sizeof(index__t *)*ng->n); /* First table */ if (ng->four_byte_counts) ng->count4[0] = (count_t *) rr_calloc(ng->table_sizes[0],sizeof(count_t)); else ng->count[0] = (count_ind_t *) rr_calloc(ng->table_sizes[0],sizeof(count_ind_t)); ng->uni_probs = (uni_probs_t *) rr_malloc(sizeof(uni_probs_t)* ng->table_sizes[0]); ng->uni_log_probs = (uni_probs_t *) rr_malloc(sizeof(uni_probs_t)* ng->table_sizes[0]); if (ng->n >=2) ng->ind[0] = (index__t *) rr_calloc(ng->table_sizes[0],sizeof(index__t)); for (i=1;i<=ng->n-2;i++) { ng->word_id[i] = (id__t *) rr_malloc(sizeof(id__t)*ng->table_sizes[i]); if (ng->four_byte_counts) ng->count4[i] = (count_t *) rr_malloc(sizeof(count_t)*ng->table_sizes[i]); else ng->count[i] = (count_ind_t *) rr_malloc(sizeof(count_ind_t)*ng->table_sizes[i]); if (ng->four_byte_alphas) ng->bo_weight4[i] = (four_byte_t *) rr_malloc(sizeof(four_byte_t)*ng->table_sizes[i]); else ng->bo_weight[i] = (bo_weight_t *) rr_malloc(sizeof(bo_weight_t)*ng->table_sizes[i]); ng->ind[i] = (index__t *) rr_malloc(sizeof(index__t)*ng->table_sizes[i]); mem_alloced = sizeof(count_ind_t) + sizeof(bo_weight_t) + sizeof(index__t) + sizeof(id__t); if (ng->four_byte_alphas) mem_alloced += 4; mem_alloced *= ng->table_sizes[i]; pc_message(verbosity,2,"Allocated %d bytes to table for %d-grams.\n", mem_alloced,i+1); } ng->word_id[ng->n-1] = (id__t *) rr_malloc(sizeof(id__t)*ng->table_sizes[ng->n-1]); if (ng->four_byte_counts) ng->count4[ng->n-1] = (count_t *) rr_malloc(sizeof(count_t)*ng->table_sizes[ng->n-1]); else ng->count[ng->n-1] = (count_ind_t *) rr_malloc(sizeof(count_ind_t)*ng->table_sizes[ng->n-1]); pc_message(verbosity,2,"Allocated (%d+%d) bytes to table for %d-grams.\n", ng->four_byte_counts?sizeof(count_t):sizeof(count_ind_t), sizeof(id__t)*ng->table_sizes[ng->n-1],ng->n); /* Allocate memory for table for first-byte of indices */ ng_allocate_ptr_table(ng,NULL,0); /* Allocate memory for alpha array */ ng->alpha_array = (double *) rr_malloc(sizeof(double)*ng->out_of_range_alphas); ng->size_of_alpha_array = 0; /* Allocate memory for frequency of frequency information */ ng->freq_of_freq = (fof_t **) rr_malloc(sizeof(fof_t *)*ng->n); NG_DISC_METH(ng)->allocate_freq_of_freq(ng); /* Read n-grams into the tree */ pc_message(verbosity,2,"Processing id n-gram file.\n"); pc_message(verbosity,2,"20,000 n-grams processed for each \".\", 1,000,000 for each line.\n"); /* Allocate space for ngrams id arrays */ current_ngram.id_array = (id__t *) rr_calloc(ng->n,sizeof(id__t)); previous_ngram.id_array = (id__t *) rr_calloc(ng->n,sizeof(id__t)); current_ngram.n = ng->n; previous_ngram.n = ng->n; ng->num_kgrams = (ngram_sz_t *) rr_calloc(ng->n,sizeof(ngram_sz_t)); ng_count = (count_t *) rr_calloc(ng->n,sizeof(count_t)); nlines = 1; ng->n_unigrams = 0; /* Process first n-gram */ get_ngram(ng->id_gram_fp,¤t_ngram,is_ascii); contains_unks = ngram_chk_contains_unks(¤t_ngram,ng->n); /* Skip over any unknown words. They will come first, because <UNK> always has a word ID of zero. */ while (ng->vocab_type == CLOSED_VOCAB && contains_unks){ /* Stop looking if there are no more N-Grams. Of course, this means training will fail, since there are no unigrams. */ if (get_ngram(ng->id_gram_fp,¤t_ngram,is_ascii) == 0) break; contains_unks = ngram_chk_contains_unks(¤t_ngram,ng->n); } for (i=0;i<=ng->n-2;i++) { ng->ind[i][0] = new_index(0,ng->ptr_table[i],&(ng->ptr_table_size[i]),0); ng->word_id[i+1][0] = current_ngram.id_array[i+1]; ng->num_kgrams[i+1]++; ng_count[i] = current_ngram.count; } ng_count[0] = current_ngram.count; NG_DISC_METH(ng)->update_freq_of_freq(ng,ng->n-1,current_ngram.count); store_normal_count(ng,0,current_ngram.count,ng->n-1); if (current_ngram.count <= ng->cutoffs[ng->n-2]) ng->num_kgrams[ng->n-1]--; ngram_copy(&previous_ngram,¤t_ngram,ng->n); prev_id1 = current_ngram.id_array[0]; displayed_oov_warning = 0; while (!rr_feof(ng->id_gram_fp)) { if (get_ngram(ng->id_gram_fp,¤t_ngram,is_ascii)) { if (ng->vocab_type == CLOSED_VOCAB) contains_unks=ngram_chk_contains_unks(¤t_ngram,ng->n); if (!contains_unks || ng->vocab_type != CLOSED_VOCAB) { /* Test for where this ngram differs from last - do we have an out-of-order ngram? */ pos_of_novelty = ngram_find_pos_of_novelty(¤t_ngram,&previous_ngram,ng->n,nlines); nlines++; show_idngram_nlines(nlines, verbosity); /* Add new n-gram as soon as it is encountered */ /* If all of the positions 2,3,...,n of the n-gram are context cues then ignore the n-gram. */ if (ng->n > 1) { NG_DISC_METH(ng)->update_freq_of_freq(ng,ng->n-1,current_ngram.count); store_normal_count(ng,ng->num_kgrams[ng->n-1],current_ngram.count,ng->n-1); ng->word_id[ng->n-1][ng->num_kgrams[ng->n-1]] = current_ngram.id_array[ng->n-1]; ng->num_kgrams[ng->n-1]++; if (ng->num_kgrams[ng->n-1] >= ng->table_sizes[ng->n-1]) quit(-1,"\nMore than %d %d-grams needed to be stored. Rerun with a higher table size.\n",ng->table_sizes[ng->n-1],ng->n); } /* Deal with new 2,3,...,(n-1)-grams */ for (i=ng->n-2;i>=MAX(1,pos_of_novelty);i--) { NG_DISC_METH(ng)->update_freq_of_freq(ng,i,ng_count[i]); if (ng_count[i] <= ng->cutoffs[i-1]) ng->num_kgrams[i]--; else store_normal_count(ng,ng->num_kgrams[i]-1,ng_count[i],i); ng_count[i] = current_ngram.count; ng->word_id[i][ng->num_kgrams[i]] = current_ngram.id_array[i]; ng->ind[i][ng->num_kgrams[i]] = new_index(ng->num_kgrams[i+1]-1, ng->ptr_table[i], &(ng->ptr_table_size[i]), ng->num_kgrams[i]); ng->num_kgrams[i]++; if (ng->num_kgrams[i] >= ng->table_sizes[i]) quit(-1,"More than %d %d-grams needed to be stored. Rerun with a higher table size.\n",ng->table_sizes[i],i+1); } for (i=0;i<=pos_of_novelty-1;i++) ng_count[i] += current_ngram.count; /* Deal with new 1-grams */ if (pos_of_novelty == 0) { if (ng->n>1) { for (i = prev_id1 + 1; i <= current_ngram.id_array[0]; i++) { ng->ind[0][i] = new_index(ng->num_kgrams[1]-1, ng->ptr_table[0], &(ng->ptr_table_size[0]), i); } prev_id1 = current_ngram.id_array[0]; } NG_DISC_METH(ng)->update_freq_of_freq(ng,0,ng_count[0]); if (!ng->context_cue[previous_ngram.id_array[0]]) { ng->n_unigrams += ng_count[0]; store_normal_count(ng,previous_ngram.id_array[0],ng_count[0],0); } store_marginal_count(ng,previous_ngram.id_array[0],ng_count[0],0); ng_count[0] = current_ngram.count; } if (current_ngram.count <= ng->cutoffs[ng->n-2]) ng->num_kgrams[ng->n-1]--; ngram_copy(&previous_ngram,¤t_ngram,ng->n); }else { if (!displayed_oov_warning){ pc_message(verbosity,2,"Warning : id n-gram stream contains OOV's (n-grams will be ignored).\n"); displayed_oov_warning = 1; } } } } rr_iclose(ng->id_gram_fp); for (i=ng->n-2;i>=1;i--) { NG_DISC_METH(ng)->update_freq_of_freq(ng,i,ng_count[i]); if (ng_count[i] <= ng->cutoffs[i-1]) ng->num_kgrams[i]--; else store_normal_count(ng,ng->num_kgrams[i]-1,ng_count[i],i); } NG_DISC_METH(ng)->update_freq_of_freq(ng,0,ng_count[0]); if (!ng->context_cue[current_ngram.id_array[0]]) { ng->n_unigrams += ng_count[0]; store_normal_count(ng,current_ngram.id_array[0],ng_count[0],0); } store_marginal_count(ng,current_ngram.id_array[0],ng_count[0],0); if (ng->n>1) { for (i=current_ngram.id_array[0]+1;i<=ng->vocab_size;i++) ng->ind[0][i] = new_index(ng->num_kgrams[1], ng->ptr_table[0], &(ng->ptr_table_size[0]), current_ngram.id_array[0]); } /* The idngram reading is completed at this point */ pc_message(verbosity,2,"\n"); /* Impose a minimum unigram count, if required */ if (ng->min_unicount > 0) { int nchanged= 0; for (i=ng->first_id;i<=ng->vocab_size;i++) { if ((return_count(ng->four_byte_counts, ng->count_table[0], ng->count[0], ng->count4[0], i) < ng->min_unicount) && !ng->context_cue[i]) { /* There was a bug in V2's switch. Look at segment for ABSOLUTE */ NG_DISC_METH(ng)->reduce_ug_freq_of_freq(ng,i); ng->n_unigrams += (ng->min_unicount - ng->count[0][i]); store_normal_count(ng,i,ng->min_unicount,0); nchanged++; } } if (nchanged > 0) pc_message(verbosity,2, "Unigram counts of %d words were bumped up to %d.\n", nchanged,ng->min_unicount); } /* Count zeroton information for unigrams */ ng->freq_of_freq[0][0] = 0; for (i=ng->first_id;i<=ng->vocab_size;i++) { if (return_count(ng->four_byte_counts, ng->count_table[0], ng->count[0], ng->count4[0], i) == 0) { ng->freq_of_freq[0][0]++; } } if (ng->discounting_method == GOOD_TURING) { for (i=0;i<=ng->n-1;i++) for (j=1;j<=ng->fof_size[i];j++) pc_message(verbosity,3,"fof[%d][%d] = %d\n",i,j,ng->freq_of_freq[i][j]); } pc_message(verbosity,2,"Calculating discounted counts.\n"); NG_DISC_METH(ng)->compute_discount_aux(ng, verbosity); /* Smooth unigram distribution, to give some mass to zerotons */ compute_unigram(ng,verbosity); /* Increment Contexts if using Good-Turing discounting-> No need otherwise, since all values are discounted anyway. */ if (ng->discounting_method == GOOD_TURING) { pc_message(verbosity,2,"Incrementing contexts...\n"); for (i=ng->n-1;i>=1;i--) increment_context(ng,i,verbosity); } /* Calculate back-off weights */ pc_message(verbosity,2,"Calculating back-off weights...\n"); for (i=1;i<=ng->n-1;i++) compute_back_off(ng,i,verbosity); if (!ng->four_byte_alphas) pc_message(verbosity,3,"Number of out of range alphas = %d\n", ng->size_of_alpha_array); /* Write out LM */ pc_message(verbosity,2,"Writing out language model...\n"); if (ng->write_arpa) write_arpa_lm(ng,verbosity); if (ng->write_bin) write_bin_lm(ng,verbosity); pc_message(verbosity,0,"idngram2lm : Done.\n"); return 0; }