/**
 * @brief
 * @param filename
 * @return
 */
bool CWikiMarkupParser::parse_document(CDocument& doc) {
	bool status = false;
	cout << "Parsing Wiki markup document using a state based tag parser" << endl;

    stringstream oss;
	string file_contents = "";
	string line;
	string line2;
	vector<string> token_vec;
	string delim = "\r\n\t ";
	string filename = doc.get_resource_name();
	string sem_wiki_fname;
	int doc_id;
	doc_id=doc.get_docID();
//	cout<<"Doc_id:"<<doc_id;
	oss<<doc_id;
	std::string path;
	path=CUtilities::semwiki_dir_path+"/";

	sem_wiki_fname=path+oss.str()+"_semwiki.txt";
	//cout<<"the path for semwiki file is:"<<sem_wiki_fname<<endl;
    _semwiki_wiki_id = doc_id;
    int pos = filename.find_last_of('/');

    string res = filename.substr(pos+1);
    int txt_loc = res.find(".txt");

     _article_title = res.substr(0,txt_loc);

	ifstream ifs(filename.c_str());
	if ( ifs.is_open() )
	{

		while ( !ifs.eof() )
		{
			getline( ifs, line );
			file_contents += line+' ';
			if(line.find("<<Author>>")!=string::npos)
			{
				string aut;
				istringstream liness(line);
				getline(liness,aut,':');
				getline(liness,aut,'\n');
				 pos = aut.find(',');


				if(pos==string::npos)
					_authors.insert(aut);
				else
				{
				string final_authors;
				string rem_stuff;
				while(pos!=string::npos)
				{
	//				cout<<"inside while:"<<aut<<endl;
					final_authors = aut.substr(0,pos);
		//			cout<<"final authors:"<<final_authors<<endl;
					rem_stuff = aut.substr(pos+1);
	//			cout<<"rem_stuff:"<<rem_stuff<<endl;
		    		_authors.insert(final_authors);
				    pos =rem_stuff.find(',');
				    aut=rem_stuff;
				    if(pos==string::npos)
				    {
				    	_authors.insert(rem_stuff);
				    	break;
				    }

				}

			}
			}
			if(line.find("<<Timestamp>>")!=string::npos)
						{
							string timestp;
							istringstream liness(line);
							getline(liness,timestp,':');
							getline(liness,timestp,'\n');

							_timestamp = timestp;
						}
		}


	}
	else
	{
		cout << "Couldn't open file " << filename << endl;
	}

	ifs.close();

	// start parsing the file_contents
	cout << "file contents size = " << file_contents.size() << endl;

	status = parse(file_contents);
	ofstream ofs;
	ofs.open(sem_wiki_fname.c_str(),ios::trunc);
	ofs<<"<<#WikiFileId>>\n";
	ofs<<_semwiki_wiki_id<<"\n";
	ofs<<"<<#Article Title>>\n";
	ofs<<_article_title<<"\n";
	ofs<<"<<#Author>>\n";
	std::set<std::string>::iterator ait;
	for(ait=_authors.begin();ait!=_authors.end();++ait)
	{

		if (_author_map.find(*ait)==_author_map.end())
					_author_map[*ait]=++_author_id;
	ofs<<(*ait)<<" $ ";
	doc._author_ids.push_back(_author_map[*ait]);
	}
	ofs<<"\n<<#Timestamp>>\n";
	ofs<<_timestamp<<"\n";
	ofs<<"<<#Infobox>>\n";
	set<string>::iterator i_itr;
	for ( i_itr = _infobox_details.begin(); i_itr != _infobox_details.end(); ++i_itr)
	{
		ofs<<*i_itr;
	}

	ofs<<"\n<<#Sections>>\n";

	list<string>::iterator sec_itr;
	list<string>::iterator sec_det_itr;
	string temp1="";
	for(sec_itr=_section_header.begin(),sec_det_itr=_section_details.begin();sec_itr!=_section_header.end()&&sec_det_itr!=_section_details.end();++sec_itr,++sec_det_itr)
	{
		temp1=*sec_itr+"  $";
		ofs<<temp1;
		ofs<<(*sec_det_itr);
        ofs<<"\n";
	}
	ofs<<"<<#LINKS>>"<<"\n";
     string temp ="";
	set<string>::iterator s_itr;
	size_t link_counter = 1;
	for ( s_itr = _outgoing_link_set.begin(); s_itr != _outgoing_link_set.end(); ++s_itr, link_counter++ )
	{
		//cout << "linking to ==> (#" << link_counter << ") " << *s_itr << endl;
		temp=*s_itr+" $";
		ofs<<temp;
		_link_set_map.insert(pair<std::string,std::string>(_article_title,temp));
		temp="";
	}
	ofs<<"\n<<#Categories>>\n";
	map<string,int>::iterator cat_map;
	set<string>::iterator cat;
	for(cat = _categories.begin();cat!=_categories.end();++cat)
	{
		if (_category_map.find(*cat)==_category_map.end())
			_category_map[*cat]=++_category_id;
		ofs<<*cat<<" $ ";
		doc._category_ids.push_back(_category_map[*cat]);
	}

ofs.close();
_authors.clear();
	_categories.clear();
	_timestamp = "";
CUtilities::tokenize(file_contents, token_vec, delim);
//cout << "Total number of raw tokens = " << token_vec.size() << endl;
vector<std::string>::iterator raw_token_it;
CBasicTokenProcessor* cbtp = new CBasicTokenProcessor();
for(raw_token_it = token_vec.begin();raw_token_it!=token_vec.end();++raw_token_it)
	{
      //cout<<"Token:"<<*raw_token_it<<endl;
	if(CDocument::_raw_token_id_map.find((*raw_token_it))==CDocument::_raw_token_id_map.end())
		CDocument::_raw_token_id_map[(*raw_token_it)]=CDocument::_raw_token_id++;
	doc.add_token(*raw_token_it);

	}

	doc.process_token_list(*cbtp, doc);
token_vec.clear();

return status;
}