bool AdClickPredictor::train() { // Obtain all the data files under the dataPath_ DIR *dp; struct dirent *dirp; if ( (dp = opendir(dataPath_.c_str())) == NULL ) { LOG(ERROR) << "opendir error: " << dataPath_ << std::endl; return false; } while ( (dirp = readdir(dp)) != NULL ) { if ( (strcmp(dirp->d_name, ".") == 0) || (strcmp(dirp->d_name, "..") == 0)) continue; if ( dirp->d_type == DT_DIR ) continue; std::string filename(dirp->d_name); LOG(INFO) << "Train AdClickPredictor from file: " << dirp->d_name << std::endl; //Training model with the current data file trainFromFile(filename); } return true; }
void Classifier::train(const std::vector<Document> &trainDocs) { std::ofstream data(TRAIN_FILE_NAME); int size = 0; std::vector<std::vector<ClassifiedTriple>> docs; for (const Document &d : trainDocs) { std::vector<ClassifiedTriple> triples = d.getClassifiedTriples(); size += triples.size(); docs.push_back(triples); } data << size <<" " << NUM_INP <<" " << NUM_OUTP<<"\n"; for (auto triples : docs) { writeClassifiedTriples(triples,data); } data.close(); trainFromFile(TRAIN_FILE_NAME); }