int skip_lines(tokenizer_t *self, int offset, int header) { int signif_chars = 0; int comment = 0; int i = 0; char c; while (i < offset) { if (self->source_pos >= self->source_len) { if (header) RETURN(INVALID_LINE); // header line is required else RETURN(NO_ERROR); // no data in input } c = self->source[self->source_pos]; if (c == '\r' || c == '\n') { if (c == '\r' && self->source_pos < self->source_len - 1 && self->source[self->source_pos + 1] == '\n') { ++self->source_pos; // skip \n in \r\n } if (!comment && signif_chars > 0) ++i; else if (comment && !header) end_comment(self); // Start by assuming a line is empty and non-commented signif_chars = 0; comment = 0; } else if ((c != ' ' && c != '\t') || !self->strip_whitespace_lines) { // comment line if (!signif_chars && self->comment != 0 && c == self->comment) comment = 1; else if (comment && !header) push_comment(self, c); // significant character encountered ++signif_chars; } else if (comment && !header) { push_comment(self, c); } ++self->source_pos; } RETURN(NO_ERROR); }
std::ostream& CPPCodeGenerator::operator ()(std::ostream &os) const { begin_comment(os); write_header(os); end_comment(os); os << std::endl <<std::endl; std::set< shared_ptr<model::Class> > class_dependencies; m_Class->visit(bind(&CPPCodeGenerator::collect_class_dependencies, this, _1, ref(class_dependencies))); os << "#ifndef J2CPP_INCLUDE_IMPLEMENTATION" << std::endl << std::endl; std::string strIFNDEF=m_Class->get_cxx_include_path(); algorithm::to_upper(strIFNDEF); algorithm::replace_all(strIFNDEF,"/","_"); os << "#ifndef J2CPP_" << strIFNDEF << "_HPP_DECL" << std::endl; os << "#define J2CPP_" << strIFNDEF << "_HPP_DECL" << std::endl; os << std::endl << std::endl; BOOST_FOREACH(shared_ptr<model::Class> dependend_class, class_dependencies) { if(dependend_class==m_Class) continue; std::vector< shared_ptr<model::Entity> > parentEntities; if(shared_ptr<model::Entity> parentEntity=dependend_class->get_parent()) { do { parentEntities.push_back(parentEntity); parentEntity=parentEntity->get_parent(); } while(parentEntity && parentEntity!=m_RootNS); } os << "namespace j2cpp { "; for(std::size_t ns=0;ns<parentEntities.size();++ns) { if(shared_ptr<model::Namespace> enclosingNamespace=shared_ptr<model::Namespace>(parentEntities[parentEntities.size()-ns-1],detail::dynamic_cast_tag())) { os << "namespace " << enclosingNamespace->get_name() << " { "; } else if(shared_ptr<model::Class> enclosingClass=shared_ptr<model::Class>(parentEntities[parentEntities.size()-ns-1],detail::dynamic_cast_tag())) { os << "namespace " << enclosingClass->get_name() << "_ { "; } } os << "class " << dependend_class->get_name() << ";"; for(std::size_t ns=0;ns<parentEntities.size();++ns) os << " }"; os << " }" << std::endl; } os << std::endl << std::endl; std::set<std::string> dependenciesIncludes; BOOST_FOREACH(shared_ptr<model::Class> dependend_class, class_dependencies) { if(dependend_class==m_Class) continue; dependenciesIncludes.insert(dependend_class->get_cxx_include_path()); } BOOST_FOREACH(std::string incPath, dependenciesIncludes) { os << "#include <" << incPath << ".hpp>" << std::endl; }
int tokenize(tokenizer_t *self, int end, int header, int num_cols) { char c; // input character int col = 0; // current column ignoring possibly excluded columns tokenizer_state old_state = START_LINE; // last state the tokenizer was in before CR mode int parse_newline = 0; // explicit flag to treat current char as a newline int i = 0; int whitespace = 1; delete_data(self); // clear old reading data self->num_rows = 0; self->comment_lines_len = INITIAL_COMMENT_LEN; if (header) self->num_cols = 1; // store header output in one column else self->num_cols = num_cols; // Allocate memory for structures used during tokenization self->output_cols = (char **) malloc(self->num_cols * sizeof(char *)); self->col_ptrs = (char **) malloc(self->num_cols * sizeof(char *)); self->output_len = (size_t *) malloc(self->num_cols * sizeof(size_t)); for (i = 0; i < self->num_cols; ++i) { self->output_cols[i] = (char *) calloc(1, INITIAL_COL_SIZE * sizeof(char)); // Make each col_ptrs pointer point to the beginning of the // column string self->col_ptrs[i] = self->output_cols[i]; self->output_len[i] = INITIAL_COL_SIZE; } if (end == 0) RETURN(NO_ERROR); // don't read if end == 0 self->state = START_LINE; // Loop until all of self->source has been read while (self->source_pos < self->source_len + 1) { if (self->source_pos == self->source_len || parse_newline) c = '\n'; else c = self->source[self->source_pos]; if (c == '\r') c = '\n'; parse_newline = 0; switch (self->state) { case START_LINE: if (c == '\n') break; else if ((c == ' ' || c == '\t') && self->strip_whitespace_lines) break; else if (self->comment != 0 && c == self->comment) { // comment line; ignore self->state = COMMENT; break; } // initialize variables for the beginning of line parsing col = 0; BEGIN_FIELD(); // parse in mode START_FIELD case START_FIELD: // strip whitespace before field begins if ((c == ' ' || c == '\t') && self->strip_whitespace_fields) break; else if (!self->strip_whitespace_lines && self->comment != 0 && c == self->comment) { // comment line, not caught earlier because of no stripping self->state = COMMENT; break; } else if (c == self->delimiter) // field ends before it begins { if (col >= self->num_cols) RETURN(TOO_MANY_COLS); END_FIELD(); BEGIN_FIELD(); break; } else if (c == '\n') { if (self->strip_whitespace_lines) { // Move on if the delimiter is whitespace, e.g. // '1 2 3 '->['1','2','3'] if (self->delimiter == ' ' || self->delimiter == '\t') ; // Register an empty field if non-whitespace delimiter, // e.g. '1,2, '->['1','2',''] else { if (col >= self->num_cols) RETURN(TOO_MANY_COLS); END_FIELD(); } } else if (!self->strip_whitespace_lines) { // In this case we don't want to left-strip the field, // so we backtrack size_t tmp = self->source_pos; --self->source_pos; while (self->source_pos >= 0 && self->source[self->source_pos] != self->delimiter && self->source[self->source_pos] != '\n' && self->source[self->source_pos] != '\r') { --self->source_pos; } // backtracked to line beginning if (self->source_pos == -1 || self->source[self->source_pos] == '\n' || self->source[self->source_pos] == '\r') { self->source_pos = tmp; } else { ++self->source_pos; if (self->source_pos == tmp) // no whitespace, just an empty field ; else while (self->source_pos < tmp) { // append whitespace characters PUSH(self->source[self->source_pos]); ++self->source_pos; } if (col >= self->num_cols) RETURN(TOO_MANY_COLS); END_FIELD(); // whitespace counts as a field } } END_LINE(); self->state = START_LINE; break; } else if (c == self->quotechar) // start parsing quoted field { self->state = START_QUOTED_FIELD; break; } else { if (col >= self->num_cols) RETURN(TOO_MANY_COLS); // Valid field character, parse again in FIELD mode self->state = FIELD; } case FIELD: if (self->comment != 0 && c == self->comment && whitespace && col == 0) { // No whitespace stripping, but the comment char is found // before any data, e.g. ' # a b c' self->state = COMMENT; } else if (c == self->delimiter) { // End of field, look for new field END_FIELD(); BEGIN_FIELD(); } else if (c == '\n') { // Line ending, stop parsing both field and line END_FIELD(); END_LINE(); self->state = START_LINE; } else { if (c != ' ' && c != '\t') whitespace = 0; // field is not all whitespace PUSH(c); } break; case START_QUOTED_FIELD: if ((c == ' ' || c == '\t') && self->strip_whitespace_fields) { // ignore initial whitespace break; } else if (c == self->quotechar) { // Lookahead check for double quote inside quoted field, // e.g. """cd" => "cd if (self->source_pos < self->source_len - 1) { if (self->source[self->source_pos + 1] == self->quotechar) { self->state = QUOTED_FIELD_DOUBLE_QUOTE; PUSH(c); break; } } // Parse rest of field normally, e.g. ""c self->state = FIELD; } else { // Valid field character, parse again in QUOTED_FIELD mode self->state = QUOTED_FIELD; } case QUOTED_FIELD_NEWLINE: if (self->state == QUOTED_FIELD) ; // fall through // Ignore initial whitespace if strip_whitespace_lines and // newlines regardless else if (((c == ' ' || c == '\t') && self->strip_whitespace_lines) || c == '\n') break; else if (c == self->quotechar) { self->state = FIELD; break; } else { // Once data begins, parse it as a normal quoted field self->state = QUOTED_FIELD; } case QUOTED_FIELD: if (c == self->quotechar) { // Lookahead check for double quote inside quoted field, // e.g. "ab""cd" => ab"cd if (self->source_pos < self->source_len - 1) { if (self->source[self->source_pos + 1] == self->quotechar) { self->state = QUOTED_FIELD_DOUBLE_QUOTE; PUSH(c); break; } } // Parse rest of field normally, e.g. "ab"c self->state = FIELD; } else if (c == '\n') self->state = QUOTED_FIELD_NEWLINE; else { PUSH(c); } break; case QUOTED_FIELD_DOUBLE_QUOTE: // Ignore the second double quote from "ab""cd" and parse rest of // field normally as quoted field. self->state = QUOTED_FIELD; break; case COMMENT: if (c == '\n') { self->state = START_LINE; if (!header) end_comment(self); } else if (!header) push_comment(self, c); break; // keep looping until we find a newline } ++self->source_pos; } RETURN(0); }