string require::load_module_text(fs::path filename, boost::optional<object> opts) { root_string read_only("r"); io::file &f = create<io::file>( fusion::vector2<char const*, string>(filename.string().c_str(), read_only)); root_object f_o(f); // buffer blob byte_array &blob = create<byte_array>( fusion::vector2<binary::element_type*, std::size_t>(0, 0)); root_object b_o(blob); binary::vector_type &buf = blob.get_data(); // Look for a shebang line f.read_binary(2, blob); binary::vector_type::iterator i, s; if (buf[0] == '#' && buf[1] == '!') { // Shebang line - skip the line, but insert a comment line here to keep // source line numbers right buf.clear(); buf.push_back('/'); buf.push_back('/'); } f.read_whole_binary(blob); // Look for coding and option lines. An coding line looks like one of // "// -*- coding:utf-8 -*-" // "// vim:fileencoding=utf-8:" // // An option line looks like // "// flusspferd: -xboo" // // We continue looking until we see a blank comment or a non comment line using namespace boost::xpressive; sregex opt_re = sregex::compile("^\\s*([-\\w.]+):\\s*(.*)$"); sregex coding_re = sregex::compile("^.*coding[:=]\\s*([-\\w.]+)"); sregex empty_line_re = bos >> *_s >> eos; // We only want to look for a coding comment on line 1 or 2 int look_for_coding = 2; std::string encoding = "UTF-8"; for (i = buf.begin(); i != buf.end(); ++i) { if (*(i++) != '/' || *(i++) != '/') { // Not a comment line - stop! break; } binary::vector_type::iterator e; e = std::find(i, buf.end(), '\n'); if (e == buf.end()) break; std::string line( reinterpret_cast<char const *>(&*i), std::size_t(e-i) ); // Move onto next line i = e; smatch m; if (look_for_coding-- && regex_match(line, m, coding_re)) { // Huzzah! We have an encoding! encoding = m[1]; look_for_coding = 0; continue; } // Empty comment line - stop looking if (regex_match(line, empty_line_re)) break; if (opts && regex_match(line, m, opt_re)) { // A line we are interested in, and we have somewhere to store the result opts->set_property(m[1].str(), m[2].str()); } } // If we have "flusspferd" or "warnings" in the option, split them on // whitespace like shells do. TODO: Should we just split everything? if (opts) { value v = opts->get_property("flusspferd"); if (v.is_string()) { opts->set_property( "flusspferd", split_args_string(v) ); } v = opts->get_property("warnings"); if (v.is_string()) { opts->set_property( "warnings", split_args_string(v) ); } } return encodings::convert_to_string(encoding, blob); }