void Collection::read(mrs_string filename) { ifstream is(filename.c_str()); name_ = filename.substr(0, filename.rfind(".", filename.length())); is >> (*this); }
void Marsyas::string2parameters(mrs_string s, realvec &v, char d) { mrs_natural i =0, pos=0, newPos=0; mrs_string tmp; while(newPos != -1 ) { newPos = (mrs_natural) s.find_first_of(&d, pos, 1); tmp = s.substr(pos, newPos); v(i++) = atof(tmp.c_str()); pos = newPos+1; } }
bool TimeLine::load(mrs_string filename, mrs_string lexicon_labels) { ifstream in; filename_ = filename; if(filename == "") return false; in.open(filename.c_str()); if(!in.is_open()) { MRSWARN("TimeLine::load() - Problem opening file " << filename_); return false; } FileName f(filename); vector<mrs_string> labels; // Load lexicon dictionary if available mrs_string lexicon_label; mrs_string remainder; size_t nLabels; nLabels = std::count(lexicon_labels.begin(), lexicon_labels.end(), ','); if (lexicon_labels != ",") { for (size_t i=0; i < nLabels; i++) { lexicon_label = lexicon_labels.substr(0, lexicon_labels.find(",")); labels.push_back(lexicon_label); sort(labels.begin(), labels.end()); remainder = lexicon_labels.substr(lexicon_labels.find(",") + 1, lexicon_labels.length()); lexicon_labels = remainder; } } else nLabels = 0; if (f.ext() == "txt") // audacity label format { numRegions_ = 0; mrs_real start, end; mrs_string label; regions_.clear(); while (!in.eof()) { in >> start >> end >> label; TimeRegion region; region.start = (mrs_natural) (start * srate_); region.end = (mrs_natural) (end * srate_); region.classId = 1; region.name = label; mrs_bool label_found = false; for (unsigned int i=0; i < labels.size(); i++) { if (label == labels[i]) { label_found = true; region.classId = i; } } if (!label_found) { if (lexicon_labels == ",") { labels.push_back(label); sort(labels.begin(), labels.end()); } } regions_.push_back(region); numRegions_ ++; } // relabel classIds so that they correspond to sorted labels for (mrs_natural i=0; i < numRegions_; ++i) { mrs_string label = regions_[i].name; vector<mrs_string>::iterator it = find(labels.begin(), labels.end(), label); if (it == labels.end()) regions_[i].classId = (mrs_natural)-1; mrs_natural l = distance(labels.begin(), it); regions_[i].classId = l; } // last region is a duplicate due to empty last line // kind of a hack but works numRegions_ --; regions_.pop_back(); lineSize_ = 1; size_ = (mrs_natural) (end * srate_); in.close(); return true; }
void WekaSink::putHeader(mrs_string inObsNames) { //updctrl(ctrl_putHeader_, false); ctrl_putHeader_->setValue(true); // Only write the header when we are dealing with a new file, i.e. when // the filename setting differs from the filename we were (previously) // writing to. if ((filename_ != ctrl_filename_->to<mrs_string>())) { // Close the previously used output file if needed and cleanup. if (mos_ != NULL) { mos_->close(); delete mos_; // TODO: do something about this ugly hack. if (filename_ == "weka.arff") { remove(filename_.c_str()); } } // Set the current filename to the new value. filename_ = ctrl_filename_->to<mrs_string>(); // Open a new output stream. mos_ = new ofstream; mos_->open(filename_.c_str()); // General header stuff. (*mos_) << "% Created by Marsyas" << endl; (*mos_) << "@relation " << filename_ << endl; // The number of attributes is one less than the number of input // observations because we assume the last observation is for the label? // TODO: why this assumption? What if a use case requires two labels per // feature vector or no labels? // There is no such assumption is the WEKA ARFF format anyway. mrs_natural nAttributes = ctrl_inObservations_->to<mrs_natural>() - 1; mrs_natural nLabels = ctrl_nLabels_->to<mrs_natural>(); // Print the attribute names. // TODO: this is could be done way more elegant // (e.g. using a 'split()' or 'explode()' function). mrs_natural i; for (i =0; i < nAttributes; ++i) { mrs_string inObsName; mrs_string temp; inObsName = inObsNames.substr(0, inObsNames.find(",")); temp = inObsNames.substr(inObsNames.find(",") + 1, inObsNames.length()); inObsNames = temp; // TODO: what's the point of using an extra ostringstream here? ostringstream oss; // oss << "attribute" << i; (*mos_) << "@attribute " << inObsName << " real" << endl; } // The attribute for the label. if (!ctrl_regression_->isTrue()) { (*mos_) << "@attribute output {"; // TODO: this could be done way more elegant // (e.g. with a 'join()' or 'implode()' function). for (i=0; i < nLabels; ++i) { // TODO: what's the point of using an extra ostringstream here? ostringstream oss; // oss << "label" << i; oss << labelNames_[i]; (*mos_) << oss.str(); if (i < nLabels - 1) { (*mos_) << ","; } // (*mos_) << "@attribute output {music,speech}" << endl; } (*mos_) << "}" << endl; } else { (*mos_) << "@attribute output real" << endl; } // End of header, now we are ready for outputting the data. (*mos_) << "\n\n@data" << endl; } }