Beispiel #1
0
const char* tokenize( tokenizer_t* tokenizer )
{
  if (!tokenizer->s) return NULL;

  if (!tokenizer->next)
    return free_tokenizer( tokenizer );

  tokenizer->current = tokenizer->next;
  tokenizer->next = strpbrk( tokenizer->current, tokenizer->delimiters );

  if (tokenizer->next)
  {
    *tokenizer->next = '\0';
    tokenizer->next += 1;

    if (tokenizer->is_ignore_empties)
    {
      tokenizer->next += strspn( tokenizer->next, tokenizer->delimiters );
      if (!(*tokenizer->current))
        return tokenize( tokenizer );
    }
  }
  else if (tokenizer->is_ignore_empties && !(*tokenizer->current))
    return free_tokenizer( tokenizer );

  return tokenizer->current;
}
void close_document_loader() {
  textcat_Done(TEXT_CAT_HANDLE);
  xmlCleanupParser();
  free_tokenizer();
  close_stemmer();
  close_word_normalizer();
}
Beispiel #3
0
int main(int argc, char *argv[]) {
  char *source_directory, *destination_directory;
  switch (argc) {
    case 0:
    case 1:
      source_directory = destination_directory = ".";
      break;
    case 2:
      source_directory = argv[1];
      destination_directory = ".";
      break;
    case 3:
      source_directory = argv[1];
      destination_directory = argv[2];
      break;
    default:
      fprintf(stderr, "Too many arguments: %d\nUsage: ./gen_TF_IDF source_dir/ destination_dir/\n",argc);
      exit(1);
  }

  char senna_opt_path[FILENAME_BUFF_SIZE];
  sprintf(senna_opt_path, "%s/third-party/senna/",LLAMAPUN_ROOT_PATH);
  initialize_tokenizer(senna_opt_path);
  init_stemmer();

  ftw(source_directory, process_file, 1);

  close_stemmer();
  free_tokenizer();
  xmlCleanupParser();

  return 0;
}
Beispiel #4
0
void c_split_2(const char *in, const char *del, char **out1, char **out2) {
	*out1=NULL;
	*out2=NULL;
	const char *t;
	tokenizer_t tok = tokenizer( in, del, TOKENIZER_NO_EMPTIES );
	for ( t=tokenize(&tok); t; t=tokenize(&tok)) {
		if (*out1==NULL) { *out1=strdup(t); continue; }
		if (*out2==NULL) { *out2=strdup(t); continue; }
	}
	if (*out1==NULL) *out1=strdup("");
	if (*out2==NULL) *out2=strdup("");
	free_tokenizer( &tok );
}
int main(int argc, char *argv[]){
  jlist = create_job_list();

  char *tok;
  TOKENIZER *tokenizer;
  int ispipe, redirect_out_flag, redirect_in_flag, not_exec_cmd, is_bg_flag;

  //read-in buffer
  char data[BUFFER_MAX]; 
  char extra[1]; 

  //linked list to add argument arrays to execvp: 1 for process 1, 2 for piped process
  list_elt *argv2_elt = NULL; 
  list_elt *argv1_elt = NULL;
  message_q = NULL;
  int argv1size = 0;
  int argv2size = 0;
  int status;

  //set shell pgid
  sh_pgid = getpgid(0);

  //register sigaction for SIGCHLD, SIGINT, SIGTSTP
  struct sigaction handler_action;
  sigaction(SIGCHLD, NULL, &handler_action);  //save previous action for SIGCHLD to handler_action

  //change what handler_action does
  handler_action.sa_flags |= SA_SIGINFO;
  handler_action.sa_flags |= SA_RESTART;

  handler_action.sa_sigaction = child_signal_handler;
  sigfillset(&handler_action.sa_mask);

  // sigaction setup
  sigaction(SIGCHLD, &handler_action, NULL);
  sigaction(SIGTSTP, &handler_action, NULL);
  sigaction(SIGINT, &handler_action, NULL);

  // igmore the sigint and sigtstp sigmals
  signal(SIGINT, SIG_IGN);
  //signal(SIGTSTP, SIG_IGN);

  while(1){
    signal(SIGTTOU, SIG_IGN); //igmore sigttou for tcsetpgrp

    if (-1 == tcsetpgrp(STDIN_FILENO, sh_pgid)){
      perror("tcsetpgrp read error");
    }

    if(-1 == tcsetpgrp(STDOUT_FILENO, sh_pgid)){
      perror("tcsetpgrp read error 2");
    }

    signal(SIGTTOU, SIG_DFL); // un-ignmore sigttou

    print_list(message_q); // print the message_q

    while(message_q){
      message_q =  delete(message_q, message_q); //empty the message_q
    }
    write(STDOUT_FILENO, "kinda-sh# ", 10); //prompt

    //initialize all the flags before tokenizing
    argv1size = 0;
    argv2size = 0;
    is_bg_flag = 0;
    ispipe = 0;
    redirect_out_flag = 0;
    redirect_in_flag = 0;  
    int new_std_in=0; // <
    int new_std_out = 0; // >
    char* fname_in;
    char* fname_out;
    int error_flag=0;
    not_exec_cmd = 0;
    fg_pid=0;

    int read_val;
    data[0] = '\0'; // null terminate data array

    read_val = read(0, data, BUFFER_MAX); // read into the data 


    if(data[0] != '\n'){
      //check if there is buffer overflow
      if((read_val == BUFFER_MAX) && (data[BUFFER_MAX - 1] != '\n')){ 

        while(1){
          argv2_elt = NULL;
          argv1_elt = NULL;
          read(0, extra, 1); // read 1 char at a time from the input stream until its empty (new line)
          if(extra[0] == '\n'){ //read in until the "return(\n) key is hit (similar to flush the std_in)
            break;
          }
        }
      }
      //error handling for inappropriate read size
      if(read_val < 0){ 
        perror("Read"); //check for read error
      } 
      if(read_val == 0){
        kill(0, SIGKILL);
      }

      data[read_val-1] = '\0'; //null terminate dat
      tokenizer = init_tokenizer(data); //create tokenizer on data

      // start tokenizer loop
      while((tok = get_next_token(tokenizer)) != NULL){

        if(is_bg_flag){
          printf("& should be at the end of the command \n");
          error_flag=1;
          break;
        }

        if( (*tok != '|') && (*tok != '&') && (*tok != '<') &&(*tok !=  '>') && (*tok != '\0')){ //check for token

          if(!ispipe){

            if(redirect_out_flag || redirect_in_flag){
              printf("invalid argument between redirection and pipe\n"); //cat < infile something | wc
              error_flag=1;
              break;
            }
            argv1_elt = add(argv1_elt, tok); // add tok to  arv1
            argv1size++;
          }
          else{ // after pipe - second process
            argv2size++;
            argv2_elt = add(argv2_elt, tok);
          }
          //if jobs command called
          if(str_equals(tok, "jobs")){
            print_job_list(jlist);
            not_exec_cmd = 1;
          }

          // if fg command called
          if(str_equals(tok, "fg")){
            job* new_bg_jb;
            char* num;
            num = get_next_token(tokenizer);
            int num_int = -1;
            not_exec_cmd = 1;
            int list_len;
            //if number argument is not specified, take the most recent background job
            if(num == NULL){
              new_bg_jb = get_ith_job(jlist, 1);
              if(new_bg_jb == NULL){
                printf("fg error: no job in job queue\n");
                break;
              }
            }
            //take the num'th background job
            else{
              num_int= my_atoi(num); // run atoi on input number (ie: fg 2)
              //reverse the number into the correct job order
              list_len = listlength(jlist);
              num_int = num_int - list_len -1;

              if(num_int < 0){
                num_int = num_int * (-1);
              }
              new_bg_jb = get_ith_job(jlist, num_int); //get num_int job from the job list

              if(new_bg_jb == NULL){
                printf("fg error: no [%s] job\n", num);
                free(num);
                break;
              } 
            }
            //take the foreground job pid as the chosen bg pid
            fg_pid= new_bg_jb->pgid;
            message_q = add(message_q, "\n"); // add restarting prompt to message_q
            message_q = add(message_q, new_bg_jb->command);
            message_q = add(message_q, "Restarting: ");

            print_list(message_q);
            while(message_q){
              message_q =  delete(message_q, message_q); //empty the message_q
            }

            if(tcsetpgrp(STDIN_FILENO, fg_pid) == -1){
              perror("tcsetpgrp error");
            }

            //relay SIGCONT to the job, and mask it from all signals but SIGCHLD
            killpg(fg_pid, SIGCONT);

            sigset_t cont_mask;
            sigfillset(&cont_mask);
            sigdelset(&cont_mask, SIGCHLD);

            //now the shell should wait for the new foregrounded job to be finished
            while(fg_pid){
              sigsuspend(&cont_mask);
            }
          }
          //take the specific background job to restart. If already running, do nothing.
          if(str_equals(tok, "bg")){
            job* new_bg_jb;
            not_exec_cmd = 1;
            char* num;
            int num_int=-1;
            num = get_next_token(tokenizer);

            // if num is not specified, get the most recent background job (to restart it)
            if(num == NULL){
              new_bg_jb = get_ith_job(jlist, 1);
              if(new_bg_jb == NULL){
                printf("bg error: no job in job queue\n");
                break;
              }
            }
            else{
              num_int= my_atoi(num); // run atoi for bg 
              //set up num_int to pass
              int list_len;
              list_len = listlength(jlist);
              num_int = num_int - list_len -1;
              if(num_int < 0){
                num_int = num_int * (-1);
              }
              new_bg_jb = get_ith_job(jlist, num_int); // get ith job from job_list

              if(new_bg_jb == NULL){
                printf("bg error: no [%s] job\n", num);
                free(num);
                break;
              }
            }

            if(!job_stopped(new_bg_jb)){
              //   tcsetpgrp(STDIN_FILENO, 0);
              printf("bg error: job is already running in the background\n");
              break;
            }

            killpg(new_bg_jb->pgid, SIGCONT);
            tcsetpgrp(STDIN_FILENO, 0);
          }
        }
        else if(*tok=='>'){ // if redirect out token

          if(redirect_out_flag){
            error_flag=1;
            printf("multiple stdout redirection is invalid\n"); //printf?
            break;
          }
          else{
            fname_out = get_next_token(tokenizer); // get hte next token
            new_std_out = open(fname_out, O_WRONLY| O_TRUNC | O_CREAT, 0644); // open file
            if(new_std_out == -1){
              perror("stdout Redir Error");
            }
            redirect_out_flag=1;
          }
        }
        else if(*tok=='<'){ //if redirect in token

          if(ispipe){
            printf("invalid stdin redirection after pipe\n");
            error_flag=1;
            break;
          }
          else if(redirect_in_flag){
            printf("multiple stdin redirection is invalid\n");
            error_flag=1;
            break;
          }            
          else{
            fname_in = get_next_token(tokenizer); // get next token
            new_std_in = open(fname_in, O_RDONLY); // open the file 
            if(new_std_in == -1){
              perror("stdin Redir Error");
            }
          }
        }
        else if(*tok=='|'){ //if pipe token

          if(ispipe){ // cant have more than 1 pipe (didnt do the extra credit)
            printf("invalid multiple pipes\n");
            error_flag=1;
            break;
          }
          else if(redirect_out_flag){
            printf("invalid pipe after stdout redirection\n");
            error_flag=1;
            break;
          }
          ispipe=1; //set a pipe flag
        }
        else if(*tok == '&'){ // if background command
          is_bg_flag = 1;
        }
      }

      if(is_bg_flag){
        data[read_val-2]= '\0'; // delete the '&' from the data array
      }

      argv1size++;
      argv2size++;

      char *argv1[argv1size];
      char *argv2[argv2size];

      if(error_flag || not_exec_cmd){ //if not elecutable command (jobs, fg, bg...) or error flag (bad command)
        while( argv1_elt ){
          argv1_elt = delete(argv1_elt, argv1_elt); //empty the linked list argv1_elt
        }
        while( argv2_elt ){
          argv2_elt = delete(argv2_elt, argv2_elt); //empty the linked list argv2_elt
        }
        continue;
      }

      //set up argv1 array
      list_elt *cursor = argv1_elt;
      list_elt *last;
      int tempIndex=0;
      //last pointer to the last element of argv1_elt linked list
      while(cursor != NULL){
        last = cursor;
        cursor = cursor->next;
      }
      //move all values from argv1_elt into argv1 array.
      while(last != NULL){ 
        argv1[tempIndex]=last->item;
        last = last->prev;
        tempIndex++;
      }
      argv1[argv1size-1] = NULL;

      //set up argv2 array if there's a pipe
      if(ispipe){
        cursor = argv2_elt;
        tempIndex=0;

        //last pointer to the last element of argv2_elt linked list
        while(cursor != NULL){
          last = cursor;
          cursor = cursor->next;
        }
        //move all values from argv2_elt into argv2 array.
        while(last != NULL){ 
          argv2[tempIndex]=last->item; //put input after the pipe into an array argv2
          last = last->prev;
          tempIndex++;
        }
        argv2[argv2size-1] = NULL;
      }
      free_tokenizer( tokenizer ); 

      // tokenizer update done.

      if((pid=fork()) < 0){
        perror("fork1");
      }

      //process & job stuff
      job* jb;
      jb = create_job(); // create corresponding job
      subjob* sj;
      sj = create_subjob(); // create corresponding subjob
      set_command(data, jb);

      if(pid==0){

        if(setpgid(0,0)==-1){
          perror("setpgid error");
        }
        sj->pid = getpid(); // get subjobs pid
        jb->pgid = getpgid(pid);//pid=0
      }
      else{

        if(setpgid(pid, pid)==-1){
          perror("setpgid error");
        }
        // set correct job and subjob pid vals
        sj->pid = pid;
        jb->pgid = pid;
      }

      set_first_subjob(sj, jb); // link subjob to job
      add_new_job(jb, jlist); // add the job to the job_list jlist

      if(pid==0){
        signal(SIGINT, SIG_DFL);
        signal(SIGTSTP, SIG_DFL);

        if(setpgid(0,0)==-1){
          perror("setpgid error");
        }

        if(new_std_out != 0){
          // if redirect out command, dup accordingly
          if(dup2(new_std_out, STDOUT_FILENO) == -1){ //dup2 for > (out)
            perror("stdout dup2");
            _exit(0);
          }
          free(fname_out);
        }

        if(new_std_in != 0){
          // if redirect in command, dup accordingly
          if(dup2(new_std_in, STDIN_FILENO) == -1){ //dup2 for < (in)
            perror("stdin dup2");
            _exit(0);
          }
          free(fname_in);
        }

        if(ispipe){ // if pipe command called
          int filedes[2];

          if(pipe(filedes)){//pipe
            perror("pipe error");
          }
          scnd_pid = fork();

          if(scnd_pid < 0){ //print error if the fork failed
            perror("Fork");
            exit(-1);
          }
          //create a subjob, set its pid for both child and parent
          subjob* sj2;
          sj2 = create_subjob();

          if(scnd_pid==0){
            sj2->pid = getpid(); 
          }
          else{
            sj2->pid = scnd_pid;
          }

          jb->pgid = getpgid(scnd_pid);

          //add to the job group as the first (most recent) process.
          set_first_subjob(sj2, jb);
          set_next_subjob(sj2, sj);
          pipe_pid=sj2->pid;

          if(scnd_pid==0){ // process that writes to pipe (program 1) grand child

            if(close(filedes[0]) == -1){ // close STDIN part of pipe
              perror("close");
            }

            if(dup2(filedes[1], STDOUT_FILENO) == -1){ // dip for STDOUT
              perror("pipe dup2 #1");
            }
            status =  execvp(argv1[0], argv1); // execute

            if(status == -1){  
              perror("execvp program1");
              exit(-1);
            }
            killpg(0, SIGKILL);
            _exit(0); // exit the child
          }
          else{ //program 2; (first fork) process that reads from pipe
            sigset_t sigmask;
            sigfillset(&sigmask);
            sigdelset(&sigmask, SIGCHLD);

            while(pipe_pid){
              sigsuspend(&sigmask);
            }

            if(close(filedes[1]) == -1){ // close STDOUT part of pipe
              perror("close");
            }

            if(dup2(filedes[0], STDIN_FILENO) == -1){ // dup for the STDIN
              perror("dup2 (pipe #2)");
            }

            status = execvp(argv2[0], argv2); // execute the second part of pipe

            if(status == -1){  
              perror("execvp program2");
            }
            killpg(0, SIGKILL);
            _exit(0);
          }
        }
        else{ // if not pipe, pid.
          status = execvp(argv1[0], argv1); 

          if(status == -1){  
            perror("execvp");
          }
          _exit(0);
        }
      } 
      else{ //kinda-sh
        if(setpgid(pid, pid) ==-1){
          perror("setpgid pid");
        }

        if(is_bg_flag){ //if background command, prompt with running
          printf("Running: %s\n", data);
        }

        if(!is_bg_flag){
          fg_pid= pid; // if not bacground command, set the fg_pid to be the first fork pid val
          tcsetpgrp(STDIN_FILENO, pid);
        }

        sigset_t sigmask0;
        sigfillset(&sigmask0);
        sigdelset(&sigmask0, SIGCHLD);

        //suspend only if it's not bg process  
        while(fg_pid!=0){
          sigsuspend(&sigmask0);
        }
        if(new_std_in != 0){
          close(new_std_in); //close stdin if used
        }
        if(new_std_out != 0){
          close(new_std_out); //close stdout if used
        }

      }
      while(argv1_elt){
        argv1_elt = delete(argv1_elt, argv1_elt); //empty the linked list argv1_elt
      }
      while(argv2_elt){
        argv2_elt = delete(argv2_elt, argv2_elt); //empty the linked list argv2_elt
      }
    } //if data[0]!=\n
  }//while end

  return 0;
}
Beispiel #6
0
/**
* Main program execution
*/
int main (int argc, char *argv[]) {
	TOKENIZER *tokenizer;
	char string[1024] = "";
	char *tok;
	int br;

	int most_recent_job = 0;

	ProcessMap *jobs = new_map();

	//Set up signal handling
	signal(SIGINT, SIG_IGN);
	signal(SIGTSTP, SIG_IGN);
	signal(SIGTTOU, SIG_IGN);
	signal(SIGTTIN, SIG_IGN);
	signal(SIGTERM, SIG_IGN);


	string[1023] = '\0';	   /* ensure that string is always null-terminated */
	printf("\nEnter a command or type ctrl-d to end session.\n" );
	write(1, "\nmy-sh$ ", 8);

	//Input loop
	while ((br = read( STDIN_FILENO, string, 1023 )) > 0) {

		if (br <= 1) {
			write(1, "my-sh$ ", 8);
			continue;
		}

		string[br-1] = '\0';
		tokenizer = init_tokenizer(string);

		//Create linked list of tokens
		LinkedList *input_command = new_list(256);
		while( (tok = get_next_token( tokenizer )) != NULL ) {
			push_back(input_command, tok);
			free(tok);
		}
		free_tokenizer(tokenizer);


		int executed = 0;
		int error = 0;
		//Checks for fg or bg
		if (get_length(input_command) == 1) {
			char *only_token = pop_back(input_command);

			if (compare_strings(only_token, "fg")) {
				if (move_to_foreground(jobs, &most_recent_job) == -1)
					error = 1;
				executed = 1;
					
			} else if (compare_strings(only_token, "bg")) {
				if (move_to_background(jobs, &most_recent_job) == -1)
					error = 1;
				executed = 1;
			} else {
				push_back(input_command, only_token);
			}
			free(only_token);
		}


		//Process input for pipes or background if an error has already been detected, go to the next command
		if (!executed && !error) {

			//Sees if a background ampersand is detected
			bool is_background = determine_background(input_command);

			LinkedList *full_command = copy_list(input_command);

			if (is_background) {
				printf("Running: ");
				print_list(input_command);
			}

			//Test for pipes
			bool is_pipe = false;
			LinkedList *first_command_list = new_list(50);
			LinkedList *second_command_list = new_list(50);
			int valid_pipe = find_piping(input_command, &is_pipe, first_command_list, second_command_list);

			//Command blocks are created from the command lists
			CommandBlock *first_command = new_command_block(first_command_list);
			CommandBlock *second_command = new_command_block(second_command_list);

			//Runs a function to check that there are no invalid redirections in the case of a piping
			if (is_pipe) {
				valid_pipe = valid_pipe && check_pipe(first_command, second_command);
			}

			//Notifies user of any incorrect pipe commands
			if (!is_pipe && !first_command->valid) {
				printf("Invalid command structure\n");
			} else if (is_pipe && (!first_command->valid || !second_command->valid || !valid_pipe) ) {
				printf("Invalid command structure\n");
			}


			//If it is a pipe and all necessary conditions are valid, then the piping occurs
			if (is_pipe && first_command->valid && second_command->valid && valid_pipe) {
				
				if (pipe_job (first_command, second_command, is_background, full_command, jobs, &most_recent_job) 
					== -1)
					error = 1;

			} 
			// No piping
			else if (!is_pipe && first_command->valid) {

				if (job (first_command, is_background, full_command, jobs, &most_recent_job) == -1)
					error = 1;
			}

			destroy_list(first_command_list);
			destroy_list(second_command_list);
			destroy_block(first_command);
			destroy_block(second_command);
			destroy_list(full_command);
		}

		destroy_list(input_command);

		
		monitor_background_jobs (jobs, &most_recent_job);

		if (error)
			perror("ERROR ");

		write(1, "my-sh$ ", 8);
	}

	destroy_map(jobs);

	
	
	printf( "\nSession ended\n" );
	return 0;
}