void Collection::read(mrs_string filename) { ifstream is(filename.c_str()); name_ = filename.substr(0, filename.rfind(".", filename.length())); is >> (*this); }
/* convert a string representing time to number of samples base on the given sample rate. Format "123.456#" where # is the time division. Valid time divisions: { h, m, s, ms, us }. On a format error, Errors: -1 is returned. ie more than 1 decimal point, invalid time division. */ mrs_natural Marsyas::time2samples(mrs_string time, mrs_real srate) { //example times: { "10us", "10ms", "10s", "10m", "10h" } if (time=="") { return 0; } // calculate time value mrs_real samples=0; int i=0; int len=(int)time.length(); bool decimal_point=false; mrs_real divisor = 10.0; for (i=0; i<len && (time[i]=='.' || (time[i]>='0' && time[i]<='9')); ++i) { if (decimal_point) { if (time[i]=='.') { return -1; } samples = samples + ((mrs_real)(time[i]-'0'))/divisor; divisor = divisor * 10.0; } else if (time[i]=='.') { decimal_point=true; } else { samples = samples * 10.0 + (time[i]-'0'); } } // if (i<len) { char a=time[++i]; if (i>=len) { if (a=='h') { // hours samples= 120.0*samples*srate; } else if (a=='m') { // minutes samples= 60.0*samples*srate; } else if (a=='s') { // seconds samples= samples*srate; } else { return -1; } } else { char b=time[i]; if ((i+1)>=len) { if (a=='u' && b=='s') { // micro-seconds samples= samples/1000000.0*srate; } else if (a=='m' && b=='s') { // milli-seconds samples= samples/1000.0*srate; } else { return -1; } } } } return (mrs_natural)samples; }
bool TimeLine::load(mrs_string filename, mrs_string lexicon_labels) { ifstream in; filename_ = filename; if(filename == "") return false; in.open(filename.c_str()); if(!in.is_open()) { MRSWARN("TimeLine::load() - Problem opening file " << filename_); return false; } FileName f(filename); vector<mrs_string> labels; // Load lexicon dictionary if available mrs_string lexicon_label; mrs_string remainder; size_t nLabels; nLabels = std::count(lexicon_labels.begin(), lexicon_labels.end(), ','); if (lexicon_labels != ",") { for (size_t i=0; i < nLabels; i++) { lexicon_label = lexicon_labels.substr(0, lexicon_labels.find(",")); labels.push_back(lexicon_label); sort(labels.begin(), labels.end()); remainder = lexicon_labels.substr(lexicon_labels.find(",") + 1, lexicon_labels.length()); lexicon_labels = remainder; } } else nLabels = 0; if (f.ext() == "txt") // audacity label format { numRegions_ = 0; mrs_real start, end; mrs_string label; regions_.clear(); while (!in.eof()) { in >> start >> end >> label; TimeRegion region; region.start = (mrs_natural) (start * srate_); region.end = (mrs_natural) (end * srate_); region.classId = 1; region.name = label; mrs_bool label_found = false; for (unsigned int i=0; i < labels.size(); i++) { if (label == labels[i]) { label_found = true; region.classId = i; } } if (!label_found) { if (lexicon_labels == ",") { labels.push_back(label); sort(labels.begin(), labels.end()); } } regions_.push_back(region); numRegions_ ++; } // relabel classIds so that they correspond to sorted labels for (mrs_natural i=0; i < numRegions_; ++i) { mrs_string label = regions_[i].name; vector<mrs_string>::iterator it = find(labels.begin(), labels.end(), label); if (it == labels.end()) regions_[i].classId = (mrs_natural)-1; mrs_natural l = distance(labels.begin(), it); regions_[i].classId = l; } // last region is a duplicate due to empty last line // kind of a hack but works numRegions_ --; regions_.pop_back(); lineSize_ = 1; size_ = (mrs_natural) (end * srate_); in.close(); return true; }
void WekaSink::putHeader(mrs_string inObsNames) { //updctrl(ctrl_putHeader_, false); ctrl_putHeader_->setValue(true); // Only write the header when we are dealing with a new file, i.e. when // the filename setting differs from the filename we were (previously) // writing to. if ((filename_ != ctrl_filename_->to<mrs_string>())) { // Close the previously used output file if needed and cleanup. if (mos_ != NULL) { mos_->close(); delete mos_; // TODO: do something about this ugly hack. if (filename_ == "weka.arff") { remove(filename_.c_str()); } } // Set the current filename to the new value. filename_ = ctrl_filename_->to<mrs_string>(); // Open a new output stream. mos_ = new ofstream; mos_->open(filename_.c_str()); // General header stuff. (*mos_) << "% Created by Marsyas" << endl; (*mos_) << "@relation " << filename_ << endl; // The number of attributes is one less than the number of input // observations because we assume the last observation is for the label? // TODO: why this assumption? What if a use case requires two labels per // feature vector or no labels? // There is no such assumption is the WEKA ARFF format anyway. mrs_natural nAttributes = ctrl_inObservations_->to<mrs_natural>() - 1; mrs_natural nLabels = ctrl_nLabels_->to<mrs_natural>(); // Print the attribute names. // TODO: this is could be done way more elegant // (e.g. using a 'split()' or 'explode()' function). mrs_natural i; for (i =0; i < nAttributes; ++i) { mrs_string inObsName; mrs_string temp; inObsName = inObsNames.substr(0, inObsNames.find(",")); temp = inObsNames.substr(inObsNames.find(",") + 1, inObsNames.length()); inObsNames = temp; // TODO: what's the point of using an extra ostringstream here? ostringstream oss; // oss << "attribute" << i; (*mos_) << "@attribute " << inObsName << " real" << endl; } // The attribute for the label. if (!ctrl_regression_->isTrue()) { (*mos_) << "@attribute output {"; // TODO: this could be done way more elegant // (e.g. with a 'join()' or 'implode()' function). for (i=0; i < nLabels; ++i) { // TODO: what's the point of using an extra ostringstream here? ostringstream oss; // oss << "label" << i; oss << labelNames_[i]; (*mos_) << oss.str(); if (i < nLabels - 1) { (*mos_) << ","; } // (*mos_) << "@attribute output {music,speech}" << endl; } (*mos_) << "}" << endl; } else { (*mos_) << "@attribute output real" << endl; } // End of header, now we are ready for outputting the data. (*mos_) << "\n\n@data" << endl; } }