const char* tokenize( tokenizer_t* tokenizer ) { if (!tokenizer->s) return NULL; if (!tokenizer->next) return free_tokenizer( tokenizer ); tokenizer->current = tokenizer->next; tokenizer->next = strpbrk( tokenizer->current, tokenizer->delimiters ); if (tokenizer->next) { *tokenizer->next = '\0'; tokenizer->next += 1; if (tokenizer->is_ignore_empties) { tokenizer->next += strspn( tokenizer->next, tokenizer->delimiters ); if (!(*tokenizer->current)) return tokenize( tokenizer ); } } else if (tokenizer->is_ignore_empties && !(*tokenizer->current)) return free_tokenizer( tokenizer ); return tokenizer->current; }
void close_document_loader() { textcat_Done(TEXT_CAT_HANDLE); xmlCleanupParser(); free_tokenizer(); close_stemmer(); close_word_normalizer(); }
int main(int argc, char *argv[]) { char *source_directory, *destination_directory; switch (argc) { case 0: case 1: source_directory = destination_directory = "."; break; case 2: source_directory = argv[1]; destination_directory = "."; break; case 3: source_directory = argv[1]; destination_directory = argv[2]; break; default: fprintf(stderr, "Too many arguments: %d\nUsage: ./gen_TF_IDF source_dir/ destination_dir/\n",argc); exit(1); } char senna_opt_path[FILENAME_BUFF_SIZE]; sprintf(senna_opt_path, "%s/third-party/senna/",LLAMAPUN_ROOT_PATH); initialize_tokenizer(senna_opt_path); init_stemmer(); ftw(source_directory, process_file, 1); close_stemmer(); free_tokenizer(); xmlCleanupParser(); return 0; }
void c_split_2(const char *in, const char *del, char **out1, char **out2) { *out1=NULL; *out2=NULL; const char *t; tokenizer_t tok = tokenizer( in, del, TOKENIZER_NO_EMPTIES ); for ( t=tokenize(&tok); t; t=tokenize(&tok)) { if (*out1==NULL) { *out1=strdup(t); continue; } if (*out2==NULL) { *out2=strdup(t); continue; } } if (*out1==NULL) *out1=strdup(""); if (*out2==NULL) *out2=strdup(""); free_tokenizer( &tok ); }
int main(int argc, char *argv[]){ jlist = create_job_list(); char *tok; TOKENIZER *tokenizer; int ispipe, redirect_out_flag, redirect_in_flag, not_exec_cmd, is_bg_flag; //read-in buffer char data[BUFFER_MAX]; char extra[1]; //linked list to add argument arrays to execvp: 1 for process 1, 2 for piped process list_elt *argv2_elt = NULL; list_elt *argv1_elt = NULL; message_q = NULL; int argv1size = 0; int argv2size = 0; int status; //set shell pgid sh_pgid = getpgid(0); //register sigaction for SIGCHLD, SIGINT, SIGTSTP struct sigaction handler_action; sigaction(SIGCHLD, NULL, &handler_action); //save previous action for SIGCHLD to handler_action //change what handler_action does handler_action.sa_flags |= SA_SIGINFO; handler_action.sa_flags |= SA_RESTART; handler_action.sa_sigaction = child_signal_handler; sigfillset(&handler_action.sa_mask); // sigaction setup sigaction(SIGCHLD, &handler_action, NULL); sigaction(SIGTSTP, &handler_action, NULL); sigaction(SIGINT, &handler_action, NULL); // igmore the sigint and sigtstp sigmals signal(SIGINT, SIG_IGN); //signal(SIGTSTP, SIG_IGN); while(1){ signal(SIGTTOU, SIG_IGN); //igmore sigttou for tcsetpgrp if (-1 == tcsetpgrp(STDIN_FILENO, sh_pgid)){ perror("tcsetpgrp read error"); } if(-1 == tcsetpgrp(STDOUT_FILENO, sh_pgid)){ perror("tcsetpgrp read error 2"); } signal(SIGTTOU, SIG_DFL); // un-ignmore sigttou print_list(message_q); // print the message_q while(message_q){ message_q = delete(message_q, message_q); //empty the message_q } write(STDOUT_FILENO, "kinda-sh# ", 10); //prompt //initialize all the flags before tokenizing argv1size = 0; argv2size = 0; is_bg_flag = 0; ispipe = 0; redirect_out_flag = 0; redirect_in_flag = 0; int new_std_in=0; // < int new_std_out = 0; // > char* fname_in; char* fname_out; int error_flag=0; not_exec_cmd = 0; fg_pid=0; int read_val; data[0] = '\0'; // null terminate data array read_val = read(0, data, BUFFER_MAX); // read into the data if(data[0] != '\n'){ //check if there is buffer overflow if((read_val == BUFFER_MAX) && (data[BUFFER_MAX - 1] != '\n')){ while(1){ argv2_elt = NULL; argv1_elt = NULL; read(0, extra, 1); // read 1 char at a time from the input stream until its empty (new line) if(extra[0] == '\n'){ //read in until the "return(\n) key is hit (similar to flush the std_in) break; } } } //error handling for inappropriate read size if(read_val < 0){ perror("Read"); //check for read error } if(read_val == 0){ kill(0, SIGKILL); } data[read_val-1] = '\0'; //null terminate dat tokenizer = init_tokenizer(data); //create tokenizer on data // start tokenizer loop while((tok = get_next_token(tokenizer)) != NULL){ if(is_bg_flag){ printf("& should be at the end of the command \n"); error_flag=1; break; } if( (*tok != '|') && (*tok != '&') && (*tok != '<') &&(*tok != '>') && (*tok != '\0')){ //check for token if(!ispipe){ if(redirect_out_flag || redirect_in_flag){ printf("invalid argument between redirection and pipe\n"); //cat < infile something | wc error_flag=1; break; } argv1_elt = add(argv1_elt, tok); // add tok to arv1 argv1size++; } else{ // after pipe - second process argv2size++; argv2_elt = add(argv2_elt, tok); } //if jobs command called if(str_equals(tok, "jobs")){ print_job_list(jlist); not_exec_cmd = 1; } // if fg command called if(str_equals(tok, "fg")){ job* new_bg_jb; char* num; num = get_next_token(tokenizer); int num_int = -1; not_exec_cmd = 1; int list_len; //if number argument is not specified, take the most recent background job if(num == NULL){ new_bg_jb = get_ith_job(jlist, 1); if(new_bg_jb == NULL){ printf("fg error: no job in job queue\n"); break; } } //take the num'th background job else{ num_int= my_atoi(num); // run atoi on input number (ie: fg 2) //reverse the number into the correct job order list_len = listlength(jlist); num_int = num_int - list_len -1; if(num_int < 0){ num_int = num_int * (-1); } new_bg_jb = get_ith_job(jlist, num_int); //get num_int job from the job list if(new_bg_jb == NULL){ printf("fg error: no [%s] job\n", num); free(num); break; } } //take the foreground job pid as the chosen bg pid fg_pid= new_bg_jb->pgid; message_q = add(message_q, "\n"); // add restarting prompt to message_q message_q = add(message_q, new_bg_jb->command); message_q = add(message_q, "Restarting: "); print_list(message_q); while(message_q){ message_q = delete(message_q, message_q); //empty the message_q } if(tcsetpgrp(STDIN_FILENO, fg_pid) == -1){ perror("tcsetpgrp error"); } //relay SIGCONT to the job, and mask it from all signals but SIGCHLD killpg(fg_pid, SIGCONT); sigset_t cont_mask; sigfillset(&cont_mask); sigdelset(&cont_mask, SIGCHLD); //now the shell should wait for the new foregrounded job to be finished while(fg_pid){ sigsuspend(&cont_mask); } } //take the specific background job to restart. If already running, do nothing. if(str_equals(tok, "bg")){ job* new_bg_jb; not_exec_cmd = 1; char* num; int num_int=-1; num = get_next_token(tokenizer); // if num is not specified, get the most recent background job (to restart it) if(num == NULL){ new_bg_jb = get_ith_job(jlist, 1); if(new_bg_jb == NULL){ printf("bg error: no job in job queue\n"); break; } } else{ num_int= my_atoi(num); // run atoi for bg //set up num_int to pass int list_len; list_len = listlength(jlist); num_int = num_int - list_len -1; if(num_int < 0){ num_int = num_int * (-1); } new_bg_jb = get_ith_job(jlist, num_int); // get ith job from job_list if(new_bg_jb == NULL){ printf("bg error: no [%s] job\n", num); free(num); break; } } if(!job_stopped(new_bg_jb)){ // tcsetpgrp(STDIN_FILENO, 0); printf("bg error: job is already running in the background\n"); break; } killpg(new_bg_jb->pgid, SIGCONT); tcsetpgrp(STDIN_FILENO, 0); } } else if(*tok=='>'){ // if redirect out token if(redirect_out_flag){ error_flag=1; printf("multiple stdout redirection is invalid\n"); //printf? break; } else{ fname_out = get_next_token(tokenizer); // get hte next token new_std_out = open(fname_out, O_WRONLY| O_TRUNC | O_CREAT, 0644); // open file if(new_std_out == -1){ perror("stdout Redir Error"); } redirect_out_flag=1; } } else if(*tok=='<'){ //if redirect in token if(ispipe){ printf("invalid stdin redirection after pipe\n"); error_flag=1; break; } else if(redirect_in_flag){ printf("multiple stdin redirection is invalid\n"); error_flag=1; break; } else{ fname_in = get_next_token(tokenizer); // get next token new_std_in = open(fname_in, O_RDONLY); // open the file if(new_std_in == -1){ perror("stdin Redir Error"); } } } else if(*tok=='|'){ //if pipe token if(ispipe){ // cant have more than 1 pipe (didnt do the extra credit) printf("invalid multiple pipes\n"); error_flag=1; break; } else if(redirect_out_flag){ printf("invalid pipe after stdout redirection\n"); error_flag=1; break; } ispipe=1; //set a pipe flag } else if(*tok == '&'){ // if background command is_bg_flag = 1; } } if(is_bg_flag){ data[read_val-2]= '\0'; // delete the '&' from the data array } argv1size++; argv2size++; char *argv1[argv1size]; char *argv2[argv2size]; if(error_flag || not_exec_cmd){ //if not elecutable command (jobs, fg, bg...) or error flag (bad command) while( argv1_elt ){ argv1_elt = delete(argv1_elt, argv1_elt); //empty the linked list argv1_elt } while( argv2_elt ){ argv2_elt = delete(argv2_elt, argv2_elt); //empty the linked list argv2_elt } continue; } //set up argv1 array list_elt *cursor = argv1_elt; list_elt *last; int tempIndex=0; //last pointer to the last element of argv1_elt linked list while(cursor != NULL){ last = cursor; cursor = cursor->next; } //move all values from argv1_elt into argv1 array. while(last != NULL){ argv1[tempIndex]=last->item; last = last->prev; tempIndex++; } argv1[argv1size-1] = NULL; //set up argv2 array if there's a pipe if(ispipe){ cursor = argv2_elt; tempIndex=0; //last pointer to the last element of argv2_elt linked list while(cursor != NULL){ last = cursor; cursor = cursor->next; } //move all values from argv2_elt into argv2 array. while(last != NULL){ argv2[tempIndex]=last->item; //put input after the pipe into an array argv2 last = last->prev; tempIndex++; } argv2[argv2size-1] = NULL; } free_tokenizer( tokenizer ); // tokenizer update done. if((pid=fork()) < 0){ perror("fork1"); } //process & job stuff job* jb; jb = create_job(); // create corresponding job subjob* sj; sj = create_subjob(); // create corresponding subjob set_command(data, jb); if(pid==0){ if(setpgid(0,0)==-1){ perror("setpgid error"); } sj->pid = getpid(); // get subjobs pid jb->pgid = getpgid(pid);//pid=0 } else{ if(setpgid(pid, pid)==-1){ perror("setpgid error"); } // set correct job and subjob pid vals sj->pid = pid; jb->pgid = pid; } set_first_subjob(sj, jb); // link subjob to job add_new_job(jb, jlist); // add the job to the job_list jlist if(pid==0){ signal(SIGINT, SIG_DFL); signal(SIGTSTP, SIG_DFL); if(setpgid(0,0)==-1){ perror("setpgid error"); } if(new_std_out != 0){ // if redirect out command, dup accordingly if(dup2(new_std_out, STDOUT_FILENO) == -1){ //dup2 for > (out) perror("stdout dup2"); _exit(0); } free(fname_out); } if(new_std_in != 0){ // if redirect in command, dup accordingly if(dup2(new_std_in, STDIN_FILENO) == -1){ //dup2 for < (in) perror("stdin dup2"); _exit(0); } free(fname_in); } if(ispipe){ // if pipe command called int filedes[2]; if(pipe(filedes)){//pipe perror("pipe error"); } scnd_pid = fork(); if(scnd_pid < 0){ //print error if the fork failed perror("Fork"); exit(-1); } //create a subjob, set its pid for both child and parent subjob* sj2; sj2 = create_subjob(); if(scnd_pid==0){ sj2->pid = getpid(); } else{ sj2->pid = scnd_pid; } jb->pgid = getpgid(scnd_pid); //add to the job group as the first (most recent) process. set_first_subjob(sj2, jb); set_next_subjob(sj2, sj); pipe_pid=sj2->pid; if(scnd_pid==0){ // process that writes to pipe (program 1) grand child if(close(filedes[0]) == -1){ // close STDIN part of pipe perror("close"); } if(dup2(filedes[1], STDOUT_FILENO) == -1){ // dip for STDOUT perror("pipe dup2 #1"); } status = execvp(argv1[0], argv1); // execute if(status == -1){ perror("execvp program1"); exit(-1); } killpg(0, SIGKILL); _exit(0); // exit the child } else{ //program 2; (first fork) process that reads from pipe sigset_t sigmask; sigfillset(&sigmask); sigdelset(&sigmask, SIGCHLD); while(pipe_pid){ sigsuspend(&sigmask); } if(close(filedes[1]) == -1){ // close STDOUT part of pipe perror("close"); } if(dup2(filedes[0], STDIN_FILENO) == -1){ // dup for the STDIN perror("dup2 (pipe #2)"); } status = execvp(argv2[0], argv2); // execute the second part of pipe if(status == -1){ perror("execvp program2"); } killpg(0, SIGKILL); _exit(0); } } else{ // if not pipe, pid. status = execvp(argv1[0], argv1); if(status == -1){ perror("execvp"); } _exit(0); } } else{ //kinda-sh if(setpgid(pid, pid) ==-1){ perror("setpgid pid"); } if(is_bg_flag){ //if background command, prompt with running printf("Running: %s\n", data); } if(!is_bg_flag){ fg_pid= pid; // if not bacground command, set the fg_pid to be the first fork pid val tcsetpgrp(STDIN_FILENO, pid); } sigset_t sigmask0; sigfillset(&sigmask0); sigdelset(&sigmask0, SIGCHLD); //suspend only if it's not bg process while(fg_pid!=0){ sigsuspend(&sigmask0); } if(new_std_in != 0){ close(new_std_in); //close stdin if used } if(new_std_out != 0){ close(new_std_out); //close stdout if used } } while(argv1_elt){ argv1_elt = delete(argv1_elt, argv1_elt); //empty the linked list argv1_elt } while(argv2_elt){ argv2_elt = delete(argv2_elt, argv2_elt); //empty the linked list argv2_elt } } //if data[0]!=\n }//while end return 0; }
/** * Main program execution */ int main (int argc, char *argv[]) { TOKENIZER *tokenizer; char string[1024] = ""; char *tok; int br; int most_recent_job = 0; ProcessMap *jobs = new_map(); //Set up signal handling signal(SIGINT, SIG_IGN); signal(SIGTSTP, SIG_IGN); signal(SIGTTOU, SIG_IGN); signal(SIGTTIN, SIG_IGN); signal(SIGTERM, SIG_IGN); string[1023] = '\0'; /* ensure that string is always null-terminated */ printf("\nEnter a command or type ctrl-d to end session.\n" ); write(1, "\nmy-sh$ ", 8); //Input loop while ((br = read( STDIN_FILENO, string, 1023 )) > 0) { if (br <= 1) { write(1, "my-sh$ ", 8); continue; } string[br-1] = '\0'; tokenizer = init_tokenizer(string); //Create linked list of tokens LinkedList *input_command = new_list(256); while( (tok = get_next_token( tokenizer )) != NULL ) { push_back(input_command, tok); free(tok); } free_tokenizer(tokenizer); int executed = 0; int error = 0; //Checks for fg or bg if (get_length(input_command) == 1) { char *only_token = pop_back(input_command); if (compare_strings(only_token, "fg")) { if (move_to_foreground(jobs, &most_recent_job) == -1) error = 1; executed = 1; } else if (compare_strings(only_token, "bg")) { if (move_to_background(jobs, &most_recent_job) == -1) error = 1; executed = 1; } else { push_back(input_command, only_token); } free(only_token); } //Process input for pipes or background if an error has already been detected, go to the next command if (!executed && !error) { //Sees if a background ampersand is detected bool is_background = determine_background(input_command); LinkedList *full_command = copy_list(input_command); if (is_background) { printf("Running: "); print_list(input_command); } //Test for pipes bool is_pipe = false; LinkedList *first_command_list = new_list(50); LinkedList *second_command_list = new_list(50); int valid_pipe = find_piping(input_command, &is_pipe, first_command_list, second_command_list); //Command blocks are created from the command lists CommandBlock *first_command = new_command_block(first_command_list); CommandBlock *second_command = new_command_block(second_command_list); //Runs a function to check that there are no invalid redirections in the case of a piping if (is_pipe) { valid_pipe = valid_pipe && check_pipe(first_command, second_command); } //Notifies user of any incorrect pipe commands if (!is_pipe && !first_command->valid) { printf("Invalid command structure\n"); } else if (is_pipe && (!first_command->valid || !second_command->valid || !valid_pipe) ) { printf("Invalid command structure\n"); } //If it is a pipe and all necessary conditions are valid, then the piping occurs if (is_pipe && first_command->valid && second_command->valid && valid_pipe) { if (pipe_job (first_command, second_command, is_background, full_command, jobs, &most_recent_job) == -1) error = 1; } // No piping else if (!is_pipe && first_command->valid) { if (job (first_command, is_background, full_command, jobs, &most_recent_job) == -1) error = 1; } destroy_list(first_command_list); destroy_list(second_command_list); destroy_block(first_command); destroy_block(second_command); destroy_list(full_command); } destroy_list(input_command); monitor_background_jobs (jobs, &most_recent_job); if (error) perror("ERROR "); write(1, "my-sh$ ", 8); } destroy_map(jobs); printf( "\nSession ended\n" ); return 0; }