/** * @brief A wrapper routine for get_next_token(). Reads next token from query * using get_next_token() and further ensures that the token read is a * valid one. * @param query The query from where next token will be read. * @param index The index in the passed query from where next token will be * scanned. * @return The token that we have just read. */ std::string get_next_valid_token(std::string *query, unsigned int* index) { std::string current_token = get_next_token(query, index); std::string tmp = ""; //check if this token is a valid one while (!is_valid_token(current_token) and *index < query->length()) { current_token = get_next_token(query, index); } //how did we get out ? was it because we got a valid token or because end // of query reched ? if (*index >= query->length() and !is_valid_token(current_token)) { return ""; } // remove tokens enclosed in backticks if any if (current_token[0] == '`') { sanitize_token(¤t_token); } // if token is CONCAT then eat out everything until a ')' is found if (convert_to_uppercase(current_token) == "CONCAT") { current_token = get_next_token(query, index); while (current_token != ")" and *index < query->length()) { current_token = get_next_token(query, index); } if (*index < query->length()) { //this is bad as even though closing ')' have been encountered // we still need a valid token return ""; } /* * if we are here then current_token is ")" now read next_token * so far we have only eaten CONCAT block. it's not for sure that next * block is a valid one. * is this recursive block correct ? */ current_token = get_next_valid_token(query, index); } else if (convert_to_uppercase(current_token) == "MAX") { /* * MAx(coulmn_name) is a keyword that gives a column name in round * brackets. */ tmp = get_next_token(query, index); //must be a ( if (tmp != "(") { std::cerr << "No '(' after MAX at pos: " << *index << std::endl; return ""; } //now read the actual col name current_token = get_next_token(query, index); //bypass the closing ')' tmp = get_next_token(query, index); //must be a ( if (tmp != ")") { std::cerr << "No ')' after MAX at pos: " << *index << std::endl; return ""; } } return current_token; }
/** * @brief Check if a token is reserved most importantly whether it can trigger * state change. * @param token Token which is to be checked * @return Return true/false depending upon whether this token is a reserved * one or not */ bool is_token_reserved(std::string token) { /* * whenever comparing for keywords always, uppercase the token which is to be * compared */ std::string token_in_cap = convert_to_uppercase(token); //list of reserved keywords const char *keywords[] = { "SELECT", "FROM", "WHERE", "GROUP", "BY", "HAVING", "AND", "OR", "NOT", "INNER", "OUTER", "ON", "JOIN", "ORDER", "LIMIT", "ASC", "DESC", "ALL", "LEFT", "RIGHT", "UNION", "LIKE", "MAX", "IN", "IS", "NULL", "NOW" }; unsigned int number_of_keywords = 27; for (unsigned int i = 0; i < number_of_keywords; i++) { if (token_in_cap == keywords[i]) { return true; } } return false; }
/** * @brief Sets the state depending upon token scanned in the input. And returns * true/false indicating whether the state changed or not. True meaning * state changed while false otherwise. * @param token Token which will be examined. * @param current_state The current state of the program * @param previous_state The previous state of the program. * @return Returns true/false indicating whether current token triggered a state * change. */ bool set_state(std::string &token, token_state_t *current_state, token_state_t *previous_state) { std::string token_in_cap = convert_to_uppercase(token); int cur_state = *current_state; if (token_in_cap == "SELECT" or token_in_cap == "UNION") { /* * if state is SELECT then both states will reset * UNION will any way be followed by a SELECT so action is same */ *previous_state = *current_state = SELECT; state_reset_needed = true; return true; } else if (token_in_cap == "FROM" or token_in_cap == "JOIN") { /* * JOIN and FROM lists table names: * select ss.secondaryKeyword FROM site s INNER JOIN site_state st */ *previous_state = *current_state; *current_state = FROM; } else if (token_in_cap == "WHERE" or token_in_cap == "ON" or token_in_cap == "BY") { /* * ON| WHERE| BY: give columns names usually in composite manner * BY is part of 'ORDER BY' clause * ..JOIN site_seo ss ON ss.siteId = s.siteId LEFT JOIN site_noalert na ON na.site = s.siteId */ *previous_state = *current_state; *current_state = WHERE; } if (*current_state == cur_state) { return false; } return true; }
/** * @brief * Function to open a specific file mentioned in the path * based on the incoming modes * @param file_path * Pointer to the location of the file path string * @param flags * Flags indicating the modes in which the file is to be opened * * @return Non-zero:File descriptor of the opened file Zero :File open is unsuccessful */ int file_open(const char *file_path,int flags) { const char *path = file_path; const char *temp_path,*delim_strt; char shrt_file_name[SHRT_FILE_NAME_LEN]; char long_file_name[LONG_FILE_NAME_LEN]; int len = 0,fl_des = 0,crt_flag,i; int delim_cnt = 0; int mode; int extn_len_cnt = 0; int seq_num = 1; bool is_file_found; dir_entry *entry = NULL; file_info *info; u8 *pwd = root_directory; u32 strt_cluster = rt_dir_strt_clus; bool is_long_file_name = false; sw_memset(long_file_name,SPACE_VAL,LONG_FILE_NAME_LEN); delim_cnt = find_depth(file_path); path = file_path; for(i=0;i<delim_cnt;i++){ if(*path == DELIMITER){ delim_strt = path; path++; } while((*path != EXTN_DELIMITER) && (*path != '\0') && (*path != DELIMITER) && (len < LONG_FILE_NAME_LEN)){ long_file_name[len] = *path; path++; len++; } temp_path = path; if(*temp_path == EXTN_DELIMITER){ temp_path++; while(*temp_path != DELIMITER && *temp_path != '\0'){ extn_len_cnt++; temp_path++; } } if(len > FILE_NAME_SHRT_LEN || extn_len_cnt > FILE_NAME_EXTN_LEN) is_long_file_name = true; if(is_long_file_name){ path = delim_strt; len = 0; if(*path == DELIMITER) path++; while(len < LONG_FILE_NAME_LEN && *path != '\0' && *path != DELIMITER){ long_file_name[len] = *path; path++; len++; } long_file_name[len] = '\0'; if(entry){ sw_free(entry); entry = NULL; } is_file_found = get_dir_entry(long_file_name,&entry, pwd,strt_cluster,true); } else{ len = FILE_NAME_SHRT_LEN; while(len < SHRT_FILE_NAME_LEN && *path != '\0' && *path != DELIMITER){ if(*path == EXTN_DELIMITER) path++; long_file_name[len] = *path; path++; len++; } convert_to_uppercase(long_file_name); if(entry){ sw_free(entry); entry = NULL; } is_file_found = get_dir_entry(long_file_name,&entry, pwd,strt_cluster,false); } if((is_file_found) & (i != delim_cnt - 1)){ strt_cluster = (entry->strt_clus_hword)<<16 | (entry->strt_clus_lword); pwd = cluster_to_memory_addr(strt_cluster); len = 0; extn_len_cnt = 0; sw_memset(shrt_file_name,SPACE_VAL,SHRT_FILE_NAME_LEN); sw_memset(long_file_name,SPACE_VAL,LONG_FILE_NAME_LEN); is_long_file_name = false; } } if(is_file_found){ if(flags & FILE_WRITE){ if(chk_file_lock(file_path) == -1) flags = FILE_READ; if(entry->attr & ATTR_READ){ sw_printf("Cannot open the file in write mode\n"); return -1; } } info = (file_info*)sw_malloc(sizeof(file_info)); fl_des = retrieve_file_info(info,entry,flags, dir_file_offset,file_path); } else{ if((flags & FILE_CREATE_NEW) || (flags & FILE_CREATE_ALWAYS) || (flags & FILE_WRITE)){ if(is_long_file_name){ get_short_file_name(long_file_name,shrt_file_name, (char)seq_num); if(get_dir_entry(shrt_file_name,NULL, pwd,strt_cluster,false) == true){ while(get_dir_entry(shrt_file_name,NULL, pwd,strt_cluster,false)){ seq_num++; get_short_file_name(long_file_name, shrt_file_name,'seq_num'); } } convert_to_uppercase(shrt_file_name); crt_flag = create_file(long_file_name, shrt_file_name,strt_cluster,&entry); } else crt_flag = create_file(NULL,long_file_name,strt_cluster,&entry); if(crt_flag == 0) sw_printf("File creation success\n"); info = (file_info*)sw_malloc(sizeof(file_info)); fl_des = retrieve_file_info(info,entry,flags, dir_file_offset,file_path); } else return -1; } return fl_des; }
/** * @brief The main routine which accepts a SQL query and returns a list of type * TblColList which will contain list of all table and column names * referenced in the given query. * @param queryStr The query which is to be looked into. * @return A list of results. */ struct TblColList* ProcessQuery(std::string queryStr) { std::string current_token, previous_token, next_token; unsigned int index = 0; std::list<std::string> table_name_list; //store list of tables in current state // a SELECT/UNION will reset it. token_state_t current_state = NONE, previous_state = NONE; std::string table_name, col_name; std::list<lookup_table_for_name_alias_t> lookup_table_list; lookup_table_for_name_alias_t lookup_element; /* * we will use stack where we will save the table_name_list the moment * we encounter a opening round bracket. We dont need to save pRes as * this stores relationship already established between column name and tables. * This is in a way immutable once we the values have been stored. values * such as current_token, next_token and index are all either changing and thus * have state for that iteration only or their linear growth is valid even * in a subquery (for index). * */ struct query_state_t query_state; std::stack<struct query_state_t> query_state_stack; struct TblColList *pRes = new TblColList; while (index < queryStr.length()) { col_name = ""; current_token = ""; current_token = get_next_valid_token(&queryStr, &index); //have we reached end of stream if (current_token == "") { //no matter what we must end processing. How could we get an empty token ? return pRes; } if (current_token == "(") { /* * when a opening '(' is encountered in the stream, it will not * necessarily mean beginning of a sub-query. It can involve expressions * like: * ..from coupon c where c.id=5 and (c.roll>9) * If we do a state save the moment we encounter "(" then alias c cant be * looked up therefore we will do a state save only when we encounter * SELECT after '(' . * * Also when we encounter a '(' NOT followed by a select then we will * do the state save but we will not reset the state. So we will push * current state the moment we find '(' but we will reset the state only * when the next token is SELECT. * * Another option could have been that we save state only when '(' is * followed by SELECT. But when we encounter the closing bracket ')' * what do we pop off the stack ? * (Well it could be simple pop if u can otherwise ignore. It would only * mean that the opening ( for this closing ) did not mark a subquery.) * Not a good option since for mal-formed query if there are misplaced * closing brackets then wrong state will get popped off at wrong time . * (optimal but does not work) */ next_token = ""; next_token = get_next_valid_token(&queryStr, &index); //create an empty query_state variable query_state.current_state = NONE; query_state.previous_state = NONE; query_state.table_name_list.clear(); query_state.select_triggered_query_state_change = false; //save the state query_state.current_state = current_state; query_state.previous_state = previous_state; query_state.table_name_list = table_name_list; //state save only when SELECT is the next token if (convert_to_uppercase(next_token) == "SELECT") { query_state.select_triggered_query_state_change = true; query_state_stack.push(query_state); //also reset the state table_name_list.clear(); current_state = NONE; previous_state = NONE; } //also push back this token pushback_token_to_stream(&next_token, &index); continue; } if (current_token == ")") { //now time to pop back what we stored in stack if (query_state_stack.empty()) { continue; } if (query_state_stack.top().select_triggered_query_state_change == true) { /* * if the state saved at stack was triggered by SELECT then only * do a state save and pop * pop and save state into current variables */ table_name_list = (query_state_stack.top().table_name_list); current_state = (query_state_stack.top()).current_state; previous_state = (query_state_stack.top()).previous_state; //pop the top query_state_stack.pop(); } continue; } // see if this token triggers a state change if (set_state(current_token, ¤t_state, &previous_state) == true) { continue; } /* * if a state reset needed because keyword SELECT/UNION has been * encountered in the input stream, then perform a state reset. After * resetting table_name_list toggle state_reset_needed flag. */ if (state_reset_needed) { table_name_list.clear(); toggle_state_reset(); } /* * when being in a state, if a reserved token is encountered and * if code at that position can not handle it then it must do * a pushback followed a continue. We will handle reserved keywords * or unhandled tokens here. * Right now we are not handling most of the reserved tokens or operators * in this block. so continue */ if (is_token_reserved(current_token) or is_token_operator(current_token)) { // deal with token continue; } if (current_token == "," or current_token == ";") { continue; } //process state SELECT if (current_state == SELECT) { } // process state FROM if (current_state == FROM) { /* * we can look for columns in FROM state * 1. .. select name,roll from table_t1,table_t2 * // table_names separated by comma * 2. .. select name,roll from table1 where roll>9 * // table names followed by reserved keywords * 3. .. select a,b,c from table1 as t1,table2 as t2 * // table_name with alias_name separated by 'AS' * 4. .. select t1.name,t2.roll from table1 t1,table2 t2 * // table_name with alias separated by space */ lookup_element.alias_name = ""; lookup_element.table_name = ""; if (!is_valid_tblcol_name(current_token)) { pushback_token_to_stream(¤t_token, &index); continue; } next_token = ""; next_token = get_next_valid_token(&queryStr, &index); /* * Next token can be 'AS' or an alias name. For all other values * of next_tokens, it must be pushed back to stream */ if ((is_valid_tblcol_name(next_token) == false)) { /* * first and second case * token_reserved will be when we have single table only. * should AND,OR,NOT be part of reserved_tokens or operators ? */ table_name_list.push_back(current_token); lookup_element.table_name = current_token; lookup_table_list.push_back(lookup_element); //next_token may be reserved see if it triggers state change pushback_token_to_stream(&next_token, &index); continue; } else if (next_token == "AS" or next_token == "as") { // third case , then do one more lookahead table_name_list.push_back(current_token); //get next token next_token = get_next_valid_token(&queryStr, &index); if (!is_valid_tblcol_name(next_token)) { //this is bad std::cerr << "Expected a valid <column_name> after AS before : " << next_token << std::endl; return pRes; } //save the table_name and col_names lookup_element.table_name = current_token; lookup_element.alias_name = next_token; lookup_table_list.push_back(lookup_element); continue; } else { //fourth case table_name_list.push_back(current_token); lookup_element.table_name = current_token; lookup_element.alias_name = next_token; lookup_table_list.push_back(lookup_element); } } else if (current_state == WHERE) { /* * what about queries where col name is referenced in a non-composite * relationship ? for e.g 'select rollno from class where rollno >9'. * since we are processing only queries where cols and tables will * be referenced not selected, then we might not handle this case. */ // reject tokens that we might not need // for now we will reject any reserved keyword or operator if (!is_valid_tblcol_name(current_token)) { pushback_token_to_stream(¤t_token, &index); continue; } next_token = ""; next_token = get_next_valid_token(&queryStr, &index); if (next_token == ".") { //case where composite col name and table name will be found next_token = get_next_valid_token(&queryStr, &index); if (!is_valid_tblcol_name(next_token)) { //thats bad std::cerr << "Expected valid token after '.' near " << index << std::endl; return pRes; } /* * checks could be put here to ensure that next_token is a valid token * as a matter of all places where we are doing lookahead, this * should be checked. */ //current_token could be alias so lets get its table name table_name = find_table_name_of_alias_tblname(lookup_table_list, current_token); if (!is_valid_tblcol_name(table_name)) { //its an error -- will not happen since we will get back alias name // in cases where we dont find a suitable table_name for alias_name } else { store_table_name_uniquely(pRes->mTblNameList, table_name); std::string tmp = table_name + "." + next_token; store_table_col_name_uniquely(pRes->mTblColNameList, tmp); } } else { /* * =========== NON_STANDARD BEHAVIOUR =========== * case where we have non-composite and possibly single col. * When in WHERE clause more than one columns are referenced then * they will always use '.' to denote table_name or table_name_alias * with col_name . For.eg. select * from table1,table2 where table1.id >5 and table2.id <5; * And when there are no colms with '.' separating the col and table_name * then it means we have single table only. * * Therefore, this token is the col_name beglonging * to this case. * * Resolving the ambiguity: IDB-4122 * ---------------------- * * For single table case: all cols will be considered as referenced. However for * multi-table non-composite columns , we will list them without any relationship. */ if (table_name_list.size() == 1) { /* * case where we have single table name but may have * mutliple cols. */ table_name = table_name_list.front(); store_table_name_uniquely(pRes->mTblNameList, table_name); std::string tmp = table_name + "." + current_token; store_table_col_name_uniquely(pRes->mTblColNameList, tmp); } else { /* * case where we have more than one tables -- * ambiguity IDB-4122 */ for (std::list<std::string>::iterator it = table_name_list.begin(); it != table_name_list.end(); it++) { store_table_name_uniquely(pRes->mTblNameList, *it); } std::string tmp = current_token; store_table_col_name_uniquely(pRes->mTblColNameList, tmp); } } } } return pRes; }