Ejemplos de unicode_string_substr en C++ (Cpp)

Ejemplo n.º 1

0

Mostrar archivo

Archivo: logic.cpp Proyecto: bibledit/bibledit-windows

// This function omits the verse text from a line of text from the search results.
string Consistency_Logic::omit_verse_text (string input)
{
  // Imagine the following $input:
  // 1 Peter 4:17 For the time has come for judgment to begin with the household of God. If it begins first with us, what will happen to those who don’t obey the Good News of God?
  // The purpose of this function is to extract "1 Peter 4:17" from it, and leave the rest out.
  // This is done by leaving out everything after the last numeral.
  size_t length = unicode_string_length (input);
  size_t last_numeral = 0;
  for (size_t i = 0; i < length; i++) {
    string character = unicode_string_substr (input, i, 1);
    if (filter_string_is_numeric (character)) {
      last_numeral = i;
    }
  }
  last_numeral++;
  input = unicode_string_substr (input, 0, last_numeral);
  return input;
}

Ejemplo n.º 2

0

Mostrar archivo

Archivo: sentences.cpp Proyecto: alerque/bibledit

void Checks_Sentences::enterNames (string names_in)
{
  names.clear ();
  names_in = filter_string_str_replace ("\n", " ", names_in);
  vector <string> names2 = filter_string_explode (names_in, ' ');
  for (auto name : names2) {
    if (name != "") {
      // Limit the length to the left of the suffix in the test.
      name = unicode_string_substr (name, 0, 11);
      names.push_back (name);
    }
  }
}

Ejemplo n.º 3

0

Mostrar archivo

Archivo: sentences.cpp Proyecto: alerque/bibledit

void Checks_Sentences::addResult (string text, int modifier)
{
  // Get previous and next text fragment.
  int start = currentPosition - 25;
  if (start < 0) start = 0;
  string previousFragment = unicode_string_substr (fullText, start, currentPosition - start - 1);
  int iterations = 5;
  while (iterations) {
    size_t pos = previousFragment.find (" ");
    if (pos != string::npos) {
      if ((previousFragment.length () - pos) > 10) {
        previousFragment.erase (0, pos + 1);
      }
    }
    iterations--;
  }
  string nextFragment = unicode_string_substr (fullText, currentPosition, 25);
  while (nextFragment.length () > 10) {
    size_t pos = nextFragment.rfind (" ");
    if (pos == string::npos) nextFragment.erase (nextFragment.length () - 1, 1);
    else nextFragment.erase (pos);
  }
  // Check whether the result can be skipped due to a name being involved.
  if (modifier == skipNames) {
    string haystack = grapheme + nextFragment;
    for (auto name : names) {
      if (haystack.find (name) == 0) return;
    }
  }
  // Assemble text for checking result.
  if (modifier == displayGraphemeOnly) {
    text += ": " + grapheme;
  }
  if ((modifier == displayContext) || (modifier == skipNames)) {
    text += ": " + previousFragment + grapheme + nextFragment;
  }
  // Store checking result.
  checkingResults.push_back (make_pair (verseNumber, text));
}

Ejemplo n.º 4

0

Mostrar archivo

Archivo: export.cpp Proyecto: alerque/bibledit

string Editor_Export::cleanUSFM (string usfm)
{
  // The user may accidentally omit or erase the note caller.
  // The note caller is one character that immediately follows the note opener.
  // E.g.: \f + ...\f*.
  // Check for missing note caller, and if it's not there, add the default "+".
  // Also replace a double space after a note opener.
  for (string noteOpener : noteOpeners) {
    string opener = usfm_get_opening_usfm (noteOpener);
    usfm = filter_string_str_replace (opener + " ", opener, usfm);
    size_t pos = unicode_string_strpos (usfm, opener);
    while (pos != string::npos) {
      bool isClean = true;

      // Check that the character that follows the note opener is a non-space.
      size_t pos2 = pos + unicode_string_length (opener);
      string character = unicode_string_substr (usfm, pos2, 1);
      if (character == " ") isClean = false;
      
      // Check that the following character is not a space.
      pos2++;
      character = unicode_string_substr (usfm, pos2, 1);
      if (character != " ") isClean = false;
      
      // Fix the note caller if necessary.
      if (!isClean) {
        Database_Logs::log ("Fixing note caller in " + usfm);
        pos2--;
        usfm = unicode_string_substr (usfm, 0, pos2) + "+" + unicode_string_substr (usfm, pos2);
      }
      
      // Next iteration.
      pos = unicode_string_strpos (usfm, opener, pos + 5);
    }
  }
  
  return usfm;
}

Ejemplo n.º 5

0

Mostrar archivo

Archivo: verses.cpp Proyecto: bibledit/bibledit-windows

void Checks_Verses::missingPunctuationAtEnd (string bible, int book, int chapter, map <int, string> verses,
                                             string center_marks, string end_marks)
{
  vector <string> centermarks = filter_string_explode (center_marks, ' ');
  vector <string> endmarks = filter_string_explode (end_marks, ' ');
  Database_Check database_check;
  for (auto element : verses) {
    int verse = element.first;
    string text = element.second;
    if (verse == 0) continue;
    if (text.empty ()) continue;
    size_t text_length = unicode_string_length (text);
    string lastCharacter = unicode_string_substr (text, text_length - 1, 1);
    if (in_array (lastCharacter, centermarks)) continue;
    if (in_array (lastCharacter, endmarks)) continue;
    database_check.recordOutput (bible, book, chapter, verse, "No punctuation at end of verse: " + lastCharacter);
  }
}

Ejemplo n.º 6

0

Mostrar archivo

Archivo: sentences.cpp Proyecto: alerque/bibledit

void Checks_Sentences::check (map <int, string> texts)
{
  vector <int> verse_numbers;
  vector <string> graphemes;
  int iterations = 0;
  for (auto element : texts) {
    int verse = element.first;
    string text = element.second;
    // For the second and subsequent verse_numbers, add a space to the text,
    // because this is what is supposed to happen in USFM.
    if (iterations > 0) {
      verse_numbers.push_back (verse);
      graphemes.push_back (" ");
      fullText += " ";
    }
    // Split the UTF-8 text into graphemes and add them to the arrays of verse_numbers/graphemes.
    int count = unicode_string_length (text);
    for (int i = 0; i < count; i++) {
      grapheme = unicode_string_substr (text, i, 1);
      // Skip graphemes to be disregarded.
      if (find (disregards.begin(), disregards.end (), grapheme) != disregards.end()) continue;
      // Store verse numbers and graphemes.
      verse_numbers.push_back (verse);
      graphemes.push_back (grapheme);
      fullText += grapheme;
    }
    // Next iteration.
    iterations++;
  }
  
  // Go through the graphemes.
  int graphemeCount = graphemes.size ();
  for (int i = 0; i < graphemeCount; i++) {
    // Store current verse number in the object.
    verseNumber = verse_numbers [i];
    // Get the current grapheme.
    grapheme = graphemes [i];
    // Analyze the grapheme.
    analyzeGrapheme ();
    // Run the checks.
    checkUnknownCharacter ();
    checkGrapheme ();
  }
}

Ejemplo n.º 7

0

Mostrar archivo

Archivo: usfm.cpp Proyecto: alerque/bibledit

// This function gets a new position to insert a note.
// $usfm: The USFM code to work with.
// $position: Current position.
// $direction: Direction where to go to find the new position:
//   -1: Go back to the previous slot.
//    0: Attempt current position, else take next slot.
//    1: Go forward to the next slot.
// The positions take the string as UTF8.
size_t usfm_get_new_note_position (string usfm, size_t position, int direction)
{
  vector <string> words = filter_string_explode (usfm, ' ');

  size_t length = 0;

  vector <size_t> lengths;

  for (string word : words) {

    // Add length of item.
    length += unicode_string_length (word);
    
    // Check whether at opening marker.
    bool opening_marker = usfm_is_usfm_marker (word);
    if (opening_marker) {
      opening_marker = usfm_is_opening_marker (word);
    }

    // Don't create a slot for placing a note right after an opening marker.
    if (!opening_marker) lengths.push_back (length);

    // Add length of space.
    length++;
    
  }
  
  bool found = false;
  
  if (direction > 0) {
    // Take next position.
    for (size_t length : lengths) {
      if (found) continue;
      if (length > position) {
        position = length;
        found = true;
      }
    }
  } else if (direction < 0) {
    // Take previous position.
    vector <size_t> lengths_r (lengths.begin(), lengths.end());
    reverse (lengths_r.begin(), lengths_r.end());
    for (size_t length : lengths_r) {
      if (found) continue;
      if (length < position) {
        position = length;
        found = true;
      }
    }
  } else {
    // Take optimal position.
    for (size_t length : lengths) {
      if (found) continue;
      if (length >= position) {
        position = length;
        found = true;
      }
    }
  }
  
  if (!found) {
    position = unicode_string_length (usfm);
  }
  
  // Move a note to before punctuation.
  set <string> punctuation = {".", ",", ";", ":", "?", "!"};
  string character = unicode_string_substr (usfm, position - 1, 1);
  if (punctuation.find (character) != punctuation.end()) position--;

  return position;
}

Ejemplo n.º 8

0

Mostrar archivo

Archivo: sentences.cpp Proyecto: alerque/bibledit

void Checks_Sentences::paragraphs (map <int, string> texts, vector <int> paragraphs)
{
  vector <int> verses;
  vector <string> graphemes;
  
  // Put the UTF-8 text into the arrays of verses and graphemes.
  for (auto element : texts) {
    int verse = element.first;
    string text = element.second;
    int count = unicode_string_length (text);
    for (int i = 0; i < count; i++) {
      string grapheme = unicode_string_substr (text, i, 1);
      verses.push_back (verse);
      graphemes.push_back (grapheme);
    }
  }
  
  // Correct the positions where the paragraphs start.
  for (unsigned int i = 1; i < paragraphs.size(); i++) {
    unsigned int offset = paragraphs [i];
    int paragraphVerse = 0;
    if (offset < verses.size()) paragraphVerse = verses [offset];
    int twoVersesBack = 0;
    if ((offset - 2) < verses.size ()) twoVersesBack = verses [offset - 2];
    if (paragraphVerse != twoVersesBack) {
      for (unsigned int i2 = i; i2 < paragraphs.size(); i2++) {
        paragraphs [i2] = paragraphs [i2] - 1;
      }
    }
  }
  
  int paragraphCount = paragraphs.size();
  
  // Go through the paragraphs to see whether they start with capitals.
  for (int i = 0; i < paragraphCount; i++) {
    unsigned int offset = paragraphs [i];
    int verse = 0;
    if (offset < verses.size()) verse = verses [offset];
    string grapheme;
    if (offset < graphemes.size ()) grapheme = graphemes [offset];
    isCapital = find (capitals.begin(), capitals.end(), grapheme) != capitals.end ();
    if (!isCapital) {
      checkingResults.push_back (make_pair (verse, "Paragraph does not start with a capital: " + grapheme));
    }
  }
  
  // Go through the paragraphs to see whether they end with proper punctuation.
  for (int i = 0; i < paragraphCount; i++) {
    unsigned int offset = 0;
    if (i < (paragraphCount - 1)) {
      offset = paragraphs [i + 1];
    } else {
      offset = graphemes.size();
    }
    offset--;
    int verse = 0;
    if (offset < verses.size()) verse = verses [offset];
    string grapheme;
    if (offset < graphemes.size ()) grapheme = graphemes [offset];
    string previousGrapheme;
    if (offset) if (offset < graphemes.size ()) previousGrapheme = graphemes [offset - 1];
    isEndMark = in_array (grapheme, this->end_marks) || in_array (previousGrapheme, this->end_marks);
    if (!isEndMark) {
      checkingResults.push_back (make_pair (verse, "Paragraph does not end with an end marker: " + grapheme));
    }
  }
  
}