Beispiel #1
0
// Sorts the $abbreviations, so that the longer abbrivations come first,
// and the shorter ones follow.
// It returns the sorted vector.
vector <pair <int, string> > filter_abbreviations_sort (vector <pair <int, string> > abbreviations)
{
  vector <string> abbrevs;
  vector <unsigned int> lengths;
  for (auto & element : abbreviations) {
    string abbrev = element.second;
    abbrevs.push_back (abbrev);
    int length = unicode_string_length (abbrev);
    lengths.push_back (length);
  }
  quick_sort (lengths, abbrevs, 0, lengths.size ());
  reverse (abbrevs.begin (), abbrevs.end ());

  vector <pair <int, string> > output;
  for (unsigned int i = 0; i < abbrevs.size (); i++) {
    string abbrev = abbrevs [i];
    for (auto & element : abbreviations) {
      if (element.second == abbrev) {
        int book = element.first;
        output.push_back (make_pair (book, abbrev));
      }
    }
  }
  return output;
}
Beispiel #2
0
// Returns the offset within the $usfm code where $verse number starts.
int usfm_versenumber_to_offset (string usfm, int verse)
{
  // Verse number 0 starts at offset 0.
  if (verse == 0) return 0;
  int totalOffset = 0;
  vector <string> lines = filter_string_explode (usfm, '\n');
  for (string line : lines) {
    vector <int> verses = usfm_get_verse_numbers (line);
    for (auto & v : verses) {
      if (v == verse) return totalOffset;
    }
    totalOffset += unicode_string_length (line);
    // Add 1 for new line.
    totalOffset += 1;
  }
  return unicode_string_length (usfm);
}
Beispiel #3
0
// Returns the verse numbers in the string of $usfm code at offset $offset.
// Offset is calculated with unicode_string_length to support UTF-8.
vector <int> usfm_offset_to_versenumber (string usfm, unsigned int offset)
{
  unsigned int totalOffset = 0;
  vector <string> lines = filter_string_explode (usfm, '\n');
  for (unsigned int i = 0; i < lines.size(); i++) {
    int length = unicode_string_length (lines [i]);
    totalOffset += length;
    if (totalOffset >= offset) {
      return usfm_linenumber_to_versenumber (usfm, i);
    }
    // Add 1 for new line.
    totalOffset += 1;
  }
  return {0};
}
Beispiel #4
0
void Checks_Verses::missingPunctuationAtEnd (string bible, int book, int chapter, map <int, string> verses,
                                             string center_marks, string end_marks)
{
  vector <string> centermarks = filter_string_explode (center_marks, ' ');
  vector <string> endmarks = filter_string_explode (end_marks, ' ');
  Database_Check database_check;
  for (auto element : verses) {
    int verse = element.first;
    string text = element.second;
    if (verse == 0) continue;
    if (text.empty ()) continue;
    size_t text_length = unicode_string_length (text);
    string lastCharacter = unicode_string_substr (text, text_length - 1, 1);
    if (in_array (lastCharacter, centermarks)) continue;
    if (in_array (lastCharacter, endmarks)) continue;
    database_check.recordOutput (bible, book, chapter, verse, "No punctuation at end of verse: " + lastCharacter);
  }
}
Beispiel #5
0
// This function omits the verse text from a line of text from the search results.
string Consistency_Logic::omit_verse_text (string input)
{
  // Imagine the following $input:
  // 1 Peter 4:17 For the time has come for judgment to begin with the household of God. If it begins first with us, what will happen to those who don’t obey the Good News of God?
  // The purpose of this function is to extract "1 Peter 4:17" from it, and leave the rest out.
  // This is done by leaving out everything after the last numeral.
  size_t length = unicode_string_length (input);
  size_t last_numeral = 0;
  for (size_t i = 0; i < length; i++) {
    string character = unicode_string_substr (input, i, 1);
    if (filter_string_is_numeric (character)) {
      last_numeral = i;
    }
  }
  last_numeral++;
  input = unicode_string_substr (input, 0, last_numeral);
  return input;
}
Beispiel #6
0
void Checks_Sentences::check (map <int, string> texts)
{
  vector <int> verse_numbers;
  vector <string> graphemes;
  int iterations = 0;
  for (auto element : texts) {
    int verse = element.first;
    string text = element.second;
    // For the second and subsequent verse_numbers, add a space to the text,
    // because this is what is supposed to happen in USFM.
    if (iterations > 0) {
      verse_numbers.push_back (verse);
      graphemes.push_back (" ");
      fullText += " ";
    }
    // Split the UTF-8 text into graphemes and add them to the arrays of verse_numbers/graphemes.
    int count = unicode_string_length (text);
    for (int i = 0; i < count; i++) {
      grapheme = unicode_string_substr (text, i, 1);
      // Skip graphemes to be disregarded.
      if (find (disregards.begin(), disregards.end (), grapheme) != disregards.end()) continue;
      // Store verse numbers and graphemes.
      verse_numbers.push_back (verse);
      graphemes.push_back (grapheme);
      fullText += grapheme;
    }
    // Next iteration.
    iterations++;
  }
  
  // Go through the graphemes.
  int graphemeCount = graphemes.size ();
  for (int i = 0; i < graphemeCount; i++) {
    // Store current verse number in the object.
    verseNumber = verse_numbers [i];
    // Get the current grapheme.
    grapheme = graphemes [i];
    // Analyze the grapheme.
    analyzeGrapheme ();
    // Run the checks.
    checkUnknownCharacter ();
    checkGrapheme ();
  }
}
Beispiel #7
0
// This function extracts notes, e.g. cross references.
// $usfm: Where to extract from.
// $markers: Array of possible markers surrounding the notes.
// It returns an array of array ($offset, $note).
vector <UsfmNote> usfm_extract_notes (string usfm, const vector <string> & markers)
{
  set <string> openers;
  set <string> closers;
  for (string marker : markers) {
    openers.insert (usfm_get_opening_usfm (marker));
    closers.insert (usfm_get_closing_usfm (marker));
  }

  vector <string> markers_and_text = usfm_get_markers_and_text (usfm);

  vector <UsfmNote> notes;
  
  bool within_note = false;
  size_t running_offset = 0;
  string running_note = "";

  for (string item : markers_and_text) {
    
    if (openers.find (item) != openers.end ()) within_note = true;

    // Caclulate the offset in the main text. 
    // That means not to consider the length of the notes.
    if (!within_note) {
      running_offset += unicode_string_length (item);
    }
    
    if (within_note) running_note += item;

    if (closers.find (item) != closers.end()) {
      notes.push_back (UsfmNote (running_offset, running_note));
      running_note = "";
      within_note = false;
    }
  }
  
  return notes;
}
Beispiel #8
0
string Editor_Export::cleanUSFM (string usfm)
{
  // The user may accidentally omit or erase the note caller.
  // The note caller is one character that immediately follows the note opener.
  // E.g.: \f + ...\f*.
  // Check for missing note caller, and if it's not there, add the default "+".
  // Also replace a double space after a note opener.
  for (string noteOpener : noteOpeners) {
    string opener = usfm_get_opening_usfm (noteOpener);
    usfm = filter_string_str_replace (opener + " ", opener, usfm);
    size_t pos = unicode_string_strpos (usfm, opener);
    while (pos != string::npos) {
      bool isClean = true;

      // Check that the character that follows the note opener is a non-space.
      size_t pos2 = pos + unicode_string_length (opener);
      string character = unicode_string_substr (usfm, pos2, 1);
      if (character == " ") isClean = false;
      
      // Check that the following character is not a space.
      pos2++;
      character = unicode_string_substr (usfm, pos2, 1);
      if (character != " ") isClean = false;
      
      // Fix the note caller if necessary.
      if (!isClean) {
        Database_Logs::log ("Fixing note caller in " + usfm);
        pos2--;
        usfm = unicode_string_substr (usfm, 0, pos2) + "+" + unicode_string_substr (usfm, pos2);
      }
      
      // Next iteration.
      pos = unicode_string_strpos (usfm, opener, pos + 5);
    }
  }
  
  return usfm;
}
Beispiel #9
0
// This function gets a new position to insert a note.
// $usfm: The USFM code to work with.
// $position: Current position.
// $direction: Direction where to go to find the new position:
//   -1: Go back to the previous slot.
//    0: Attempt current position, else take next slot.
//    1: Go forward to the next slot.
// The positions take the string as UTF8.
size_t usfm_get_new_note_position (string usfm, size_t position, int direction)
{
  vector <string> words = filter_string_explode (usfm, ' ');

  size_t length = 0;

  vector <size_t> lengths;

  for (string word : words) {

    // Add length of item.
    length += unicode_string_length (word);
    
    // Check whether at opening marker.
    bool opening_marker = usfm_is_usfm_marker (word);
    if (opening_marker) {
      opening_marker = usfm_is_opening_marker (word);
    }

    // Don't create a slot for placing a note right after an opening marker.
    if (!opening_marker) lengths.push_back (length);

    // Add length of space.
    length++;
    
  }
  
  bool found = false;
  
  if (direction > 0) {
    // Take next position.
    for (size_t length : lengths) {
      if (found) continue;
      if (length > position) {
        position = length;
        found = true;
      }
    }
  } else if (direction < 0) {
    // Take previous position.
    vector <size_t> lengths_r (lengths.begin(), lengths.end());
    reverse (lengths_r.begin(), lengths_r.end());
    for (size_t length : lengths_r) {
      if (found) continue;
      if (length < position) {
        position = length;
        found = true;
      }
    }
  } else {
    // Take optimal position.
    for (size_t length : lengths) {
      if (found) continue;
      if (length >= position) {
        position = length;
        found = true;
      }
    }
  }
  
  if (!found) {
    position = unicode_string_length (usfm);
  }
  
  // Move a note to before punctuation.
  set <string> punctuation = {".", ",", ";", ":", "?", "!"};
  string character = unicode_string_substr (usfm, position - 1, 1);
  if (punctuation.find (character) != punctuation.end()) position--;

  return position;
}
Beispiel #10
0
void Checks_Sentences::paragraphs (map <int, string> texts, vector <int> paragraphs)
{
  vector <int> verses;
  vector <string> graphemes;
  
  // Put the UTF-8 text into the arrays of verses and graphemes.
  for (auto element : texts) {
    int verse = element.first;
    string text = element.second;
    int count = unicode_string_length (text);
    for (int i = 0; i < count; i++) {
      string grapheme = unicode_string_substr (text, i, 1);
      verses.push_back (verse);
      graphemes.push_back (grapheme);
    }
  }
  
  // Correct the positions where the paragraphs start.
  for (unsigned int i = 1; i < paragraphs.size(); i++) {
    unsigned int offset = paragraphs [i];
    int paragraphVerse = 0;
    if (offset < verses.size()) paragraphVerse = verses [offset];
    int twoVersesBack = 0;
    if ((offset - 2) < verses.size ()) twoVersesBack = verses [offset - 2];
    if (paragraphVerse != twoVersesBack) {
      for (unsigned int i2 = i; i2 < paragraphs.size(); i2++) {
        paragraphs [i2] = paragraphs [i2] - 1;
      }
    }
  }
  
  int paragraphCount = paragraphs.size();
  
  // Go through the paragraphs to see whether they start with capitals.
  for (int i = 0; i < paragraphCount; i++) {
    unsigned int offset = paragraphs [i];
    int verse = 0;
    if (offset < verses.size()) verse = verses [offset];
    string grapheme;
    if (offset < graphemes.size ()) grapheme = graphemes [offset];
    isCapital = find (capitals.begin(), capitals.end(), grapheme) != capitals.end ();
    if (!isCapital) {
      checkingResults.push_back (make_pair (verse, "Paragraph does not start with a capital: " + grapheme));
    }
  }
  
  // Go through the paragraphs to see whether they end with proper punctuation.
  for (int i = 0; i < paragraphCount; i++) {
    unsigned int offset = 0;
    if (i < (paragraphCount - 1)) {
      offset = paragraphs [i + 1];
    } else {
      offset = graphemes.size();
    }
    offset--;
    int verse = 0;
    if (offset < verses.size()) verse = verses [offset];
    string grapheme;
    if (offset < graphemes.size ()) grapheme = graphemes [offset];
    string previousGrapheme;
    if (offset) if (offset < graphemes.size ()) previousGrapheme = graphemes [offset - 1];
    isEndMark = in_array (grapheme, this->end_marks) || in_array (previousGrapheme, this->end_marks);
    if (!isEndMark) {
      checkingResults.push_back (make_pair (verse, "Paragraph does not end with an end marker: " + grapheme));
    }
  }
  
}