/** 
* Parses a file and returns all unique words according
*  to some parser rules and all hyperlinks based 
*  on the format the parser is designed to implement
*/
void MDPageParser::parse(std::string filename, MySet<std::string>& allWords, MySet<std::string>& allLinks){

	std::ifstream input_file(filename.c_str());// ifstream constructor takes char* NOT strings
	std::string link;
	std::string word;	
	std::string contents;
	char ch;

	//copy over enter file into a string
	while(!input_file.eof()){
		input_file.get(ch);
		if(!input_file.fail()){
			contents += ch;
		}
	}


	for(unsigned int i = 0; i<contents.size(); i++){
		if( isalpha(contents[i]) || isdigit(contents[i]) ){ //a letter or number
			while( isalpha(contents[i]) || isdigit(contents[i]) ){ //add char to word as long as it is a letter/num
				word += contents[i];
				i++;
			}
			i--; //we went 1 to far in the above while loop
			allWords.insert(word);
			word.clear();
		}
		else if( contents[i] == '[' ){
			i++; // skip '['
			while( (contents[i] != ']') ){ //add to word as long as we are in the brackets
				while( isalpha(contents[i]) || isdigit(contents[i]) ){
					word += contents[i]; 
					i++;
				}
				//at ']' char
				allWords.insert(word);
				word.clear();
				if(contents[i] != ']'){
					i++;
				}
			}
			i++;
			if(word != "") {
				allWords.insert(word);
			}
			word.clear();

			if( contents[i] == '(' ){ // [] is followed by '('
				i++; // skip '('
				while( contents[i] != ')' ){
					link += contents[i];
					i++;
				}
				allLinks.insert(link);
				link.clear();
			}
		}
	}

}
Beispiel #2
0
void MD::parse(std::string filename, MySet<std::string>& allWords, MySet<std::string>& allLinks) {
    std::ifstream file ( filename.c_str() );
    std::string line;
    std::vector <std::string> storage;

//import lines
    while ( getline (file,line) ) {
        std::string m = line;
        int i = 0;
        //char c;
        while (m[i]) {
            m[i] = tolower(m[i]);
            i++;
        }

        storage.push_back(m);
    }
    file.close();
//main parser
    for (unsigned int i=0; i < storage.size(); i++) {
        std::string liner = storage[i];
        size_t start_ = 0;
        for (size_t g = 0; g < liner.length(); g++) {
            if (isalpha(liner[g])) {
                ;
            }
            else if (liner[g-1] == ']' && liner[g] == '(') {
                int m = g;
                while (liner[m]!= ')') {
                    m++;
                }
                //std::cout << "links" << liner.substr(g+1, m-g-1) << std::endl;
                allLinks.insert( liner.substr(g+1, m-g-1) );
                start_ = m;
                g= m-1;
            }
            else {
                int word_length = g - start_;
                if(word_length > 0) {
                    //std::cout << liner.substr(start_, word_length) << std::endl; // show the word
                    allWords.insert( liner.substr(start_, word_length) );
                }
                start_ = g + 1;
            }
        }
    }

}