Пример #1
0
/**
 * @brief A wrapper routine for get_next_token(). Reads  next token from query
 * 		 using get_next_token() and further ensures that the token read is a
 * 		 valid one.
 * @param query The query from where next token will be read.
 * @param index The index in the passed query from where next token will be
 * 			scanned.
 * @return The token that we have just read.
 */
std::string get_next_valid_token(std::string *query, unsigned int* index) {
	std::string current_token = get_next_token(query, index);
	std::string tmp = "";
	//check if this token is a valid one
	while (!is_valid_token(current_token) and *index < query->length()) {
		current_token = get_next_token(query, index);
	}
	//how did we get out ? was it because we got a valid token or because end
	// of query reched ?
	if (*index >= query->length() and !is_valid_token(current_token)) {
		return "";
	}
	// remove tokens enclosed in backticks if any
	if (current_token[0] == '`') {
		sanitize_token(&current_token);
	}
	// if token is CONCAT then eat out everything until a ')' is found
	if (convert_to_uppercase(current_token) == "CONCAT") {
		current_token = get_next_token(query, index);
		while (current_token != ")" and *index < query->length()) {
			current_token = get_next_token(query, index);
		}
		if (*index < query->length()) {
			//this is bad as even though closing ')' have been encountered
			// we still need a valid token
			return "";
		}
		/*
		 * if we are here then current_token is ")" now read next_token
		 * so far we have only eaten CONCAT block. it's not for sure that next
		 * block is a valid one.
		 * is this recursive block correct ?
		 */
		current_token = get_next_valid_token(query, index);
	} else if (convert_to_uppercase(current_token) == "MAX") {
		/*
		 * MAx(coulmn_name) is a keyword that gives a column name in round
		 * brackets.
		 */
		tmp = get_next_token(query, index); //must be a (
		if (tmp != "(") {
			std::cerr << "No '(' after MAX at pos: " << *index << std::endl;
			return "";
		}
		//now read the actual col name
		current_token = get_next_token(query, index);
		//bypass the closing ')'
		tmp = get_next_token(query, index); //must be a (
		if (tmp != ")") {
			std::cerr << "No ')' after MAX at pos: " << *index << std::endl;
			return "";
		}
	}
	return current_token;
}
Пример #2
0
/**
 * @brief Check if a token is reserved most importantly whether it can trigger
 * 			state change.
 * @param token Token which is to be checked
 * @return Return true/false depending upon whether this token is a reserved
 * 			one or not
 */
bool is_token_reserved(std::string token) {
	/*
	 * whenever comparing for keywords always, uppercase the token which is to be
	 * compared
	 */
	std::string token_in_cap = convert_to_uppercase(token);

	//list of reserved keywords
	const char *keywords[] = { "SELECT", "FROM", "WHERE", "GROUP", "BY",
			"HAVING", "AND", "OR", "NOT", "INNER", "OUTER", "ON", "JOIN",
			"ORDER", "LIMIT", "ASC", "DESC", "ALL", "LEFT", "RIGHT", "UNION",
			"LIKE", "MAX", "IN", "IS", "NULL", "NOW" };
	unsigned int number_of_keywords = 27;

	for (unsigned int i = 0; i < number_of_keywords; i++) {
		if (token_in_cap == keywords[i]) {
			return true;
		}
	}
	return false;
}
Пример #3
0
/**
 * @brief Sets the state depending upon token scanned in the input. And returns
 * 		true/false indicating whether the state changed or not. True meaning
 * 		state changed while false otherwise.
 * @param token Token which will be examined.
 * @param current_state The current state of the program
 * @param previous_state The previous state of the program.
 * @return Returns true/false indicating whether current token triggered a state
 * 			change.
 */
bool set_state(std::string &token, token_state_t *current_state,
		token_state_t *previous_state) {
	std::string token_in_cap = convert_to_uppercase(token);
	int cur_state = *current_state;

	if (token_in_cap == "SELECT" or token_in_cap == "UNION") {
		/*
		 * if state is SELECT then both states will reset
		 * UNION will any way be followed by a SELECT so action is same
		 */
		*previous_state = *current_state = SELECT;
		state_reset_needed = true;
		return true;
	} else if (token_in_cap == "FROM" or token_in_cap == "JOIN") {
		/*
		 * JOIN and FROM lists table names:
		 * select ss.secondaryKeyword FROM site s  INNER JOIN site_state st
		 */
		*previous_state = *current_state;
		*current_state = FROM;
	} else if (token_in_cap == "WHERE" or token_in_cap == "ON"
			or token_in_cap == "BY") {
		/*
		 * ON| WHERE| BY: give columns names usually in composite manner
		 * BY is part of 'ORDER BY' clause
		 * ..JOIN site_seo ss ON ss.siteId = s.siteId LEFT JOIN site_noalert na ON na.site = s.siteId
		 */
		*previous_state = *current_state;
		*current_state = WHERE;
	}

	if (*current_state == cur_state) {
		return false;
	}
	return true;
}
Пример #4
0
/**
* @brief 
*	Function to open a specific file mentioned in the path 
*	based on the incoming modes
* @param file_path
*	Pointer to the location of the file path string
* @param flags
*	Flags indicating the modes in which the file is to be opened
*
* @return Non-zero:File descriptor of the opened file
	  Zero    :File open is unsuccessful
*/
int file_open(const char *file_path,int flags)
{
	const char *path = file_path;
	const char *temp_path,*delim_strt;
	char shrt_file_name[SHRT_FILE_NAME_LEN];
	char long_file_name[LONG_FILE_NAME_LEN];
	int len = 0,fl_des = 0,crt_flag,i;
	int delim_cnt = 0;
	int mode;
        int extn_len_cnt = 0;
	int seq_num = 1;
	bool is_file_found;
	dir_entry *entry = NULL;
	file_info *info;
	u8 *pwd = root_directory;
	u32 strt_cluster = rt_dir_strt_clus;
	bool is_long_file_name = false;	

	sw_memset(long_file_name,SPACE_VAL,LONG_FILE_NAME_LEN);
	delim_cnt = find_depth(file_path);
	
	path = file_path;
	for(i=0;i<delim_cnt;i++){
		if(*path == DELIMITER){
			delim_strt = path;
			path++;
		}
		while((*path != EXTN_DELIMITER) && (*path != '\0') 
			&& (*path != DELIMITER) && (len < LONG_FILE_NAME_LEN)){
			long_file_name[len] = *path; 
			path++; 
			len++;
		}
		temp_path = path;
		if(*temp_path == EXTN_DELIMITER){
			temp_path++;
			while(*temp_path != DELIMITER && *temp_path != '\0'){
				extn_len_cnt++;
				temp_path++;
			}
		}
		if(len > FILE_NAME_SHRT_LEN || extn_len_cnt > FILE_NAME_EXTN_LEN)
			is_long_file_name = true;

		if(is_long_file_name){
			path = delim_strt;
			len = 0;
			if(*path == DELIMITER)
				path++;
			while(len < LONG_FILE_NAME_LEN  && *path != '\0'
                              && *path != DELIMITER){
                             	long_file_name[len] = *path;
                                path++;
                                len++;
                        }
			long_file_name[len] = '\0';
			if(entry){
				sw_free(entry);
				entry = NULL;
			}
			is_file_found = get_dir_entry(long_file_name,&entry,
										  pwd,strt_cluster,true);				
		}
		else{
			len = FILE_NAME_SHRT_LEN;
			while(len < SHRT_FILE_NAME_LEN  && *path != '\0' 
			      && *path != DELIMITER){ 
				if(*path == EXTN_DELIMITER)
					path++;
				long_file_name[len] = *path;
				path++;
				len++;
			}
			convert_to_uppercase(long_file_name); 
			if(entry){
				sw_free(entry);
				entry = NULL;
			}
			is_file_found = get_dir_entry(long_file_name,&entry,
										  pwd,strt_cluster,false);
		}
		if((is_file_found) & (i != delim_cnt - 1)){ 
			strt_cluster = (entry->strt_clus_hword)<<16 | 
				       (entry->strt_clus_lword);
			pwd = cluster_to_memory_addr(strt_cluster);
			len = 0;
			extn_len_cnt = 0;
			sw_memset(shrt_file_name,SPACE_VAL,SHRT_FILE_NAME_LEN);
			sw_memset(long_file_name,SPACE_VAL,LONG_FILE_NAME_LEN);
			is_long_file_name = false;
		}		
	}
	if(is_file_found){
		if(flags & FILE_WRITE){
			if(chk_file_lock(file_path) == -1)
				flags = FILE_READ;				
			if(entry->attr & ATTR_READ){
				sw_printf("Cannot open the file in write mode\n");
				return -1;
			}
		}
		info = (file_info*)sw_malloc(sizeof(file_info));
                fl_des = retrieve_file_info(info,entry,flags,
											dir_file_offset,file_path);
	}  
	else{
              	if((flags & FILE_CREATE_NEW) || (flags & FILE_CREATE_ALWAYS)
                   || (flags & FILE_WRITE)){
                	if(is_long_file_name){
				get_short_file_name(long_file_name,shrt_file_name,
									(char)seq_num);
				if(get_dir_entry(shrt_file_name,NULL,
								 pwd,strt_cluster,false) == true){
					while(get_dir_entry(shrt_file_name,NULL,
										pwd,strt_cluster,false)){
						seq_num++;
						get_short_file_name(long_file_name,
											shrt_file_name,'seq_num');
					}
				}
				convert_to_uppercase(shrt_file_name);
				crt_flag = create_file(long_file_name,
									   shrt_file_name,strt_cluster,&entry);
			}
			else
				crt_flag = create_file(NULL,long_file_name,strt_cluster,&entry);
                        if(crt_flag == 0)
				sw_printf("File creation success\n");
			info = (file_info*)sw_malloc(sizeof(file_info));
			fl_des = retrieve_file_info(info,entry,flags,
										dir_file_offset,file_path);
                }
	  	else
			return -1;
        }
	return fl_des;  
}
Пример #5
0
/**
 * @brief The main routine which accepts a SQL query and returns a list of type
 * 			TblColList which will contain list of all table and column names
 * 			referenced in the given query.
 * @param queryStr The query which is to be looked into.
 * @return A list of results.
 */
struct TblColList* ProcessQuery(std::string queryStr) {
	std::string current_token, previous_token, next_token;
	unsigned int index = 0;
	std::list<std::string> table_name_list; //store list of tables in current state
	// a SELECT/UNION will reset it.
	token_state_t current_state = NONE, previous_state = NONE;
	std::string table_name, col_name;

	std::list<lookup_table_for_name_alias_t> lookup_table_list;
	lookup_table_for_name_alias_t lookup_element;

	/*
	 * we will use stack where we will save the table_name_list the moment
	 * we encounter a opening round bracket. We dont need to save pRes as
	 * this stores relationship already established between column name and tables.
	 * This is in a way immutable once we the values have been stored. values
	 * such as current_token, next_token and index are all either changing and thus
	 * have state for that iteration only or their linear growth is valid even
	 * in a subquery (for index).
	 *
	 */
	struct query_state_t query_state;
	std::stack<struct query_state_t> query_state_stack;

	struct TblColList *pRes = new TblColList;

	while (index < queryStr.length()) {
		col_name = "";
		current_token = "";
		current_token = get_next_valid_token(&queryStr, &index);

		//have we reached end of stream
		if (current_token == "") {
			//no matter what we must end processing. How could we get an empty token ?
			return pRes;
		}

		if (current_token == "(") {
			/*
			 * when a opening '(' is encountered in the stream, it will not
			 * necessarily mean beginning of a sub-query. It can involve expressions
			 * like:
			 * ..from coupon c where c.id=5 and (c.roll>9)
			 * If we do a state save the moment we encounter "(" then alias c cant be
			 * looked up therefore we will do a state save only when we encounter
			 * SELECT after '(' .
			 *
			 * Also when we encounter a '(' NOT followed by a select then we will
			 * do the state save but we will not reset the state. So we will push
			 * current state the moment we find '(' but we will reset the state only
			 * when the next token is SELECT.
			 *
			 * Another option could have been that we save state only when '(' is
			 * followed by SELECT. But when we encounter the closing bracket ')'
			 * what do we pop off the stack ?
			 * (Well it could be simple pop if u can otherwise ignore. It would only
			 * mean that the opening ( for this closing ) did not mark a subquery.)
			 * Not a good option since for mal-formed query if there are misplaced
			 * closing brackets then wrong state will get popped off at wrong time .
			 * (optimal but does not work)
			 */

			next_token = "";
			next_token = get_next_valid_token(&queryStr, &index);

			//create an empty query_state variable
			query_state.current_state = NONE;
			query_state.previous_state = NONE;
			query_state.table_name_list.clear();
			query_state.select_triggered_query_state_change = false;

			//save the state
			query_state.current_state = current_state;
			query_state.previous_state = previous_state;
			query_state.table_name_list = table_name_list;

			//state save only when SELECT is the next token
			if (convert_to_uppercase(next_token) == "SELECT") {
				query_state.select_triggered_query_state_change = true;
				query_state_stack.push(query_state);

				//also  reset the state
				table_name_list.clear();
				current_state = NONE;
				previous_state = NONE;
			}
			//also push back this token
			pushback_token_to_stream(&next_token, &index);
			continue;
		}
		if (current_token == ")") {
			//now time to pop back what we stored in stack
			if (query_state_stack.empty()) {
				continue;
			}

			if (query_state_stack.top().select_triggered_query_state_change
					== true) {
				/*
				 * if the state saved at stack was triggered by SELECT then only
				 * do a state save and pop
				 * pop and save state into current variables
				 */
				table_name_list = (query_state_stack.top().table_name_list);
				current_state = (query_state_stack.top()).current_state;
				previous_state = (query_state_stack.top()).previous_state;

				//pop the top
				query_state_stack.pop();

			}
			continue;
		}
		// see if this token triggers a state change
		if (set_state(current_token, &current_state, &previous_state) == true) {
			continue;
		}

		/*
		 * if a state reset needed because keyword SELECT/UNION has been
		 * encountered in the input stream, then perform a state reset. After
		 * resetting table_name_list toggle state_reset_needed flag.
		 */
		if (state_reset_needed) {
			table_name_list.clear();
			toggle_state_reset();
		}
		/*
		 * when being in a state, if a reserved token is encountered and
		 * if code at that position can not handle it then it must do
		 *  a pushback followed a continue. We will handle reserved keywords
		 *  or unhandled tokens here.
		 *  Right now we are not handling most of the reserved tokens or operators
		 *  in this block. so continue
		 */
		if (is_token_reserved(current_token)
				or is_token_operator(current_token)) {
			// deal with token
			continue;
		}

		if (current_token == "," or current_token == ";") {
			continue;
		}

		//process state SELECT
		if (current_state == SELECT) {

		}
		// process state FROM
		if (current_state == FROM) {
			/*
			 * we can look for columns in FROM state
			 * 1. .. select name,roll from table_t1,table_t2
			 * 		// table_names separated by comma
			 * 2. .. select name,roll from table1 where roll>9
			 * 		// table names followed by reserved keywords
			 * 3. .. select a,b,c from table1 as t1,table2 as t2
			 * 		// table_name with alias_name separated by 'AS'
			 * 4. .. select t1.name,t2.roll from table1 t1,table2 t2
			 * 		// table_name with alias separated by space
			 */
			lookup_element.alias_name = "";
			lookup_element.table_name = "";

			if (!is_valid_tblcol_name(current_token)) {
				pushback_token_to_stream(&current_token, &index);
				continue;
			}

			next_token = "";
			next_token = get_next_valid_token(&queryStr, &index);
			/*
			 * Next token can be 'AS' or an alias name. For all other values
			 * of next_tokens, it must be pushed back to stream
			 */
			if ((is_valid_tblcol_name(next_token) == false)) {
				/*
				 * first and second case
				 * token_reserved will be when we have single table only.
				 * should AND,OR,NOT be part of reserved_tokens or operators ?
				 */
				table_name_list.push_back(current_token);

				lookup_element.table_name = current_token;
				lookup_table_list.push_back(lookup_element);

				//next_token may be reserved see if it triggers state change
				pushback_token_to_stream(&next_token, &index);
				continue;

			} else if (next_token == "AS" or next_token == "as") {
				// third case , then do one more lookahead
				table_name_list.push_back(current_token);

				//get next token
				next_token = get_next_valid_token(&queryStr, &index);
				if (!is_valid_tblcol_name(next_token)) {
					//this is bad
					std::cerr
							<< "Expected a valid <column_name> after AS before : "
							<< next_token << std::endl;
					return pRes;
				}
				//save the table_name and col_names
				lookup_element.table_name = current_token;
				lookup_element.alias_name = next_token;
				lookup_table_list.push_back(lookup_element);
				continue;
			} else {
				//fourth case
				table_name_list.push_back(current_token);
				lookup_element.table_name = current_token;
				lookup_element.alias_name = next_token;
				lookup_table_list.push_back(lookup_element);
			}

		} else if (current_state == WHERE) {
			/*
			 * what about queries where col name is referenced in a non-composite
			 * relationship ? for e.g 'select rollno from class where rollno >9'.
			 * since we are processing only queries where cols and tables will
			 * be referenced not selected, then we might not handle this case.
			 */
			// reject tokens that we might not need
			// for now we will reject any reserved keyword or operator
			if (!is_valid_tblcol_name(current_token)) {
				pushback_token_to_stream(&current_token, &index);
				continue;
			}

			next_token = "";
			next_token = get_next_valid_token(&queryStr, &index);
			if (next_token == ".") {
				//case where composite col name and table name will be found
				next_token = get_next_valid_token(&queryStr, &index);
				if (!is_valid_tblcol_name(next_token)) {
					//thats bad
					std::cerr << "Expected valid token after '.' near " << index
							<< std::endl;
					return pRes;
				}
				/*
				 * checks could be put here to ensure that next_token is a valid token
				 * as a matter of all places where we are doing lookahead, this
				 * should be checked.
				 */

				//current_token could be alias so lets get its table name
				table_name = find_table_name_of_alias_tblname(lookup_table_list,
						current_token);
				if (!is_valid_tblcol_name(table_name)) {
					//its an error -- will not happen since we will get back alias name
					// in cases where we dont find a suitable table_name for alias_name
				} else {
					store_table_name_uniquely(pRes->mTblNameList, table_name);
					std::string tmp = table_name + "." + next_token;
					store_table_col_name_uniquely(pRes->mTblColNameList, tmp);
				}
			} else {
				/*
				 *  =========== NON_STANDARD BEHAVIOUR ===========
				 * case where we have non-composite and possibly single col.
				 * When in WHERE clause more than one columns are referenced then
				 * they will always use '.' to denote table_name or table_name_alias
				 * with col_name . For.eg. select * from table1,table2 where table1.id >5 and table2.id <5;
				 * And when there are no colms with '.' separating the col and table_name
				 * then it means we have single table only.
				 *
				 * Therefore, this token is the col_name beglonging
				 * to this case.
				 *
				 * Resolving the ambiguity: IDB-4122
				 * ----------------------
				 *
				 * For single table case: all cols will be considered as referenced. However for
				 * multi-table non-composite columns , we will list them without any relationship.
				 */
				if (table_name_list.size() == 1) {
					/*
					 * case where we have single table name but may have
					 * mutliple cols.
					 */
					table_name = table_name_list.front();
					store_table_name_uniquely(pRes->mTblNameList, table_name);
					std::string tmp = table_name + "." + current_token;
					store_table_col_name_uniquely(pRes->mTblColNameList, tmp);
				} else {
					/*
					 * case where we have more than one tables --
					 * ambiguity IDB-4122
					 */
					for (std::list<std::string>::iterator it =
							table_name_list.begin();
							it != table_name_list.end(); it++) {
						store_table_name_uniquely(pRes->mTblNameList, *it);
					}

					std::string tmp = current_token;
					store_table_col_name_uniquely(pRes->mTblColNameList, tmp);
				}
			}
		}
	}
	return pRes;
}