Esempio n. 1
0
size_t lastHyphenLocation(StringView string, size_t beforeIndex, const AtomicString& localeIdentifier)
{
    // libhyphen accepts strings in UTF-8 format, but WebCore can only provide StringView
    // which stores either UTF-16 or Latin1 data. This is unfortunate for performance
    // reasons and we should consider switching to a more flexible hyphenation library
    // if it is available.
    CString utf8StringCopy = string.toStringWithoutCopying().utf8();

    // WebCore often passes strings like " wordtohyphenate" to the platform layer. Since
    // libhyphen isn't advanced enough to deal with leading spaces (presumably CoreFoundation
    // can), we should find the appropriate indexes into the string to skip them.
    int32_t leadingSpaceBytes;
    int32_t leadingSpaceCharacters;
    countLeadingSpaces(utf8StringCopy, leadingSpaceBytes, leadingSpaceCharacters);

    // The libhyphen documentation specifies that this array should be 5 bytes longer than
    // the byte length of the input string.
    Vector<char> hyphenArray(utf8StringCopy.length() - leadingSpaceBytes + 5);
    char* hyphenArrayData = hyphenArray.data();

    String lowercaseLocaleIdentifier = AtomicString(localeIdentifier.string().convertToASCIILowercase());
    ASSERT(availableLocales().contains(lowercaseLocaleIdentifier));
    for (const auto& dictionaryPath : availableLocales().get(lowercaseLocaleIdentifier)) {
        RefPtr<HyphenationDictionary> dictionary = TinyLRUCachePolicy<AtomicString, RefPtr<HyphenationDictionary>>::cache().get(AtomicString(dictionaryPath));

        char** replacements = nullptr;
        int* positions = nullptr;
        int* removedCharacterCounts = nullptr;
        hnj_hyphen_hyphenate2(dictionary->libhyphenDictionary(),
            utf8StringCopy.data() + leadingSpaceBytes,
            utf8StringCopy.length() - leadingSpaceBytes,
            hyphenArrayData,
            nullptr, /* output parameter for hyphenated word */
            &replacements,
            &positions,
            &removedCharacterCounts);

        if (replacements) {
            for (unsigned i = 0; i < utf8StringCopy.length() - leadingSpaceBytes - 1; i++)
                free(replacements[i]);
            free(replacements);
        }

        free(positions);
        free(removedCharacterCounts);

        for (int i = beforeIndex - leadingSpaceCharacters - 2; i >= 0; i--) {
            // libhyphen will put an odd number in hyphenArrayData at all
            // hyphenation points. A number & 1 will be true for odd numbers.
            if (hyphenArrayData[i] & 1)
                return i + 1 + leadingSpaceCharacters;
        }
    }

    return 0;
}
Esempio n. 2
0
void Hyphenator::slotHyphenateWord(PageItem* it, const QString& text, int firstC)
{
	if ((!m_usable))//FIXME:av || (!ScMW->Sprachen.contains(it->Language)))
		return;
	const char *word;
	char *buffer;
	const int BORDER = 2;
	QByteArray te;

	//uint maxC = it->itemText.length() - 1;
	QString found = text;
	if (found.contains(SpecialChars::SHYPHEN))
		return;
	// else if (findException(found, &buffer) it->itemText.hyphenateWord(firstC, found.length(), buffer);
	else if (signed(found.length()) >= MinWordLen)
	{
		NewDict(it->itemText.charStyle(firstC).language());
  		te = m_codec->fromUnicode( found );
		word = te.data();
		int wordlen = strlen(word);
		buffer = static_cast<char*>(malloc(wordlen+BORDER+3));
		if (buffer == NULL)
			return;
		char ** rep = NULL;
		int * pos = NULL;
		int * cut = NULL;
		if (!hnj_hyphen_hyphenate2(m_hdict, word, wordlen, buffer, NULL, &rep, &pos, &cut))
		{
			//uint i = 0;
		  	buffer[wordlen] = '\0';
			it->itemText.hyphenateWord(firstC, found.length(), buffer); 
		}
		free(buffer);
		if (rep)
		{
			for (int i = 0; i < wordlen - 1; ++i)
				if (rep[i])
					free(rep[i]);
			free(rep);
		}
		if (pos) free(pos);
		if (cut) free(cut);
		buffer = NULL;
		rep = NULL;
		pos = NULL;
		cut = NULL;
	}
}
Esempio n. 3
0
void Hyphenator::slotHyphenateWord(PageItem* it, const QString& text, int firstC)
{
	if (text.contains(SpecialChars::SHYPHEN))
		return;

	const CharStyle& style = it->itemText.charStyle(firstC);
	if (text.length() < style.hyphenWordMin())
		return;

	bool ok = loadDict(style.language());
	if (!ok)
		return;

	QByteArray te = m_codec->fromUnicode(text);
	char *buffer = static_cast<char*>(malloc(te.length() + 5));
	if (buffer == nullptr)
		return;
	char **rep = nullptr;
	int *pos = nullptr;
	int *cut = nullptr;
	// TODO: support non-standard hyphenation, see hnj_hyphen_hyphenate2 docs
	if (!hnj_hyphen_hyphenate2(m_hdict, te.data(), te.length(), buffer, nullptr, &rep, &pos, &cut))
	{
		buffer[te.length()] = '\0';
		it->itemText.hyphenateWord(firstC, text.length(), buffer);
	}
	free(buffer);
	if (rep)
	{
		for (int i = 0; i < te.length() - 1; ++i)
			free(rep[i]);
	}
	free(rep);
	free(pos);
	free(cut);
}
Esempio n. 4
0
void Hyphenator::slotHyphenate(PageItem* it)
{
	if (!(it->asTextFrame()) || (it->itemText.length() == 0))
		return;
	m_doc->DoDrawing = false;

	QString text = "";

	int startC = 0;
	if (it->itemText.selectionLength() > 0)
	{
		startC = it->itemText.startOfSelection();
		text = it->itemText.text(startC, it->itemText.selectionLength());
	}
	else {
		text = it->itemText.text(0, it->itemText.length());
	}

	rememberedWords.clear();
	qApp->setOverrideCursor(QCursor(Qt::WaitCursor));

	BreakIterator* bi = StoryText::getWordIterator();
	bi->setText((const UChar*) text.utf16());
	int pos = bi->first();
	while (pos != BreakIterator::DONE)
	{
		int firstC = pos;
		pos = bi->next();
		int lastC = pos;
		int countC = lastC - firstC;

		const CharStyle& style = it->itemText.charStyle(firstC);
		if (countC > 0 && countC > style.hyphenWordMin() - 1)
		{
			QString word = text.mid(firstC, countC);
			QString wordLower = QLocale(style.language()).toLower(word);
			if (wordLower.contains(SpecialChars::SHYPHEN))
				break;

			bool ok = loadDict(style.language());
			if (!ok)
				continue;

			QByteArray te = m_codec->fromUnicode(wordLower);
			char *buffer = static_cast<char*>(malloc(te.length() + 5));
			if (buffer == nullptr)
				break;

			char **rep = nullptr;
			int *pos = nullptr;
			int *cut = nullptr;
			// TODO: support non-standard hyphenation, see hnj_hyphen_hyphenate2 docs
			if (!hnj_hyphen_hyphenate2(m_hdict, te.data(), te.length(), buffer, nullptr, &rep, &pos, &cut))
			{
	  			int i = 0;
				buffer[te.length()] = '\0';
				bool hasHyphen = false;
				for (i = 1; i < wordLower.length()-1; ++i)
				{
					if(buffer[i] & 1)
					{
						hasHyphen = true;
						break;
					}
				}
				QString outs = "";
				QString input = "";
				outs += word[0];
				for (i = 1; i < wordLower.length()-1; ++i)
				{
					outs += word[i];
					if(buffer[i] & 1)
						outs += "-";
				}
				outs += word.rightRef(1);
				input = outs;
				if (!ignoredWords.contains(word))
				{
					if (!hasHyphen)
						it->itemText.hyphenateWord(startC + firstC, wordLower.length(), nullptr);
					else if (m_automatic)
					{
						if (specialWords.contains(word))
						{
							outs = specialWords.value(word);
							uint ii = 1;
							for (i = 1; i < outs.length()-1; ++i)
							{
								QChar cht = outs[i];
								if (cht == '-')
									buffer[ii-1] = 1;
								else
								{
									buffer[ii] = 0;
									++ii;
								}
							}
						}
						it->itemText.hyphenateWord(startC + firstC, wordLower.length(), buffer);
					}
					else
					{
						if (specialWords.contains(word))
						{
							outs = specialWords.value(word);
							uint ii = 1;
							for (i = 1; i < outs.length()-1; ++i)
							{
								QChar cht = outs[i];
								if (cht == '-')
									buffer[ii-1] = 1;
								else
								{
									buffer[ii] = 0;
									++ii;
								}
							}
						}
						if (rememberedWords.contains(input))
						{
							outs = rememberedWords.value(input);
							uint ii = 1;
							for (i = 1; i < outs.length()-1; ++i)
							{
								QChar cht = outs[i];
								if (cht == '-')
									buffer[ii-1] = 1;
								else
								{
									buffer[ii] = 0;
									++ii;
								}
							}
							it->itemText.hyphenateWord(firstC, wordLower.length(), buffer);
						}
						else
						{
							qApp->changeOverrideCursor(QCursor(Qt::ArrowCursor));
							PrefsContext* prefs = PrefsManager::instance()->prefsFile->getContext("hyhpen_options");
							int xpos = prefs->getInt("Xposition", -9999);
							int ypos = prefs->getInt("Yposition", -9999);
							HyAsk *dia = new HyAsk((QWidget*)parent(), outs);
							if ((xpos != -9999) && (ypos != -9999))
								dia->move(xpos, ypos);
							qApp->processEvents();
							if (dia->exec())
							{
								outs = dia->Wort->text();
								uint ii = 1;
								for (i = 1; i < outs.length()-1; ++i)
								{
									QChar cht = outs[i];
									if (cht == '-')
										buffer[ii-1] = 1;
									else
									{
										buffer[ii] = 0;
										++ii;
									}
								}
								if (!rememberedWords.contains(input))
									rememberedWords.insert(input, outs);
								if (dia->addToIgnoreList->isChecked())
								{
									if (!ignoredWords.contains(word))
										ignoredWords.insert(word);
								}
								if (dia->addToExceptionList->isChecked())
								{
									if (!specialWords.contains(word))
										specialWords.insert(word, outs);
								}
								it->itemText.hyphenateWord(firstC, wordLower.length(), buffer);
							}
							else
							{
								free(buffer);
								buffer = nullptr;
								prefs->set("Xposition", dia->xpos);
								prefs->set("Yposition", dia->ypos);
								delete dia;
								break;
							}
							prefs->set("Xposition", dia->xpos);
							prefs->set("Yposition", dia->ypos);
							delete dia;
							qApp->changeOverrideCursor(QCursor(Qt::WaitCursor));
						}
					}
				}
			}
			free(buffer);
			if (rep)
			{
				for (int i = 0; i < te.length() - 1; ++i)
					free(rep[i]);
			}
			free(rep);
			free(pos);
			free(cut);
		}
	}
	qApp->restoreOverrideCursor();
	m_doc->DoDrawing = true;
	rememberedWords.clear();
}
Esempio n. 5
0
nsresult
nsHyphenator::Hyphenate(const nsAString& aString,
                        nsTArray<bool>& aHyphens)
{
  if (!aHyphens.SetLength(aString.Length())) {
    return NS_ERROR_OUT_OF_MEMORY;
  }
  memset(aHyphens.Elements(), false, aHyphens.Length());

  bool inWord = false;
  uint32_t wordStart = 0, wordLimit = 0;
  uint32_t chLen;
  for (uint32_t i = 0; i < aString.Length(); i += chLen) {
    uint32_t ch = aString[i];
    chLen = 1;

    if (NS_IS_HIGH_SURROGATE(ch)) {
      if (i + 1 < aString.Length() && NS_IS_LOW_SURROGATE(aString[i+1])) {
        ch = SURROGATE_TO_UCS4(ch, aString[i+1]);
        chLen = 2;
      } else {
        NS_WARNING("unpaired surrogate found during hyphenation");
      }
    }

    nsIUGenCategory::nsUGenCategory cat = mozilla::unicode::GetGenCategory(ch);
    if (cat == nsIUGenCategory::kLetter || cat == nsIUGenCategory::kMark) {
      if (!inWord) {
        inWord = true;
        wordStart = i;
      }
      wordLimit = i + chLen;
      if (i + chLen < aString.Length()) {
        continue;
      }
    }

    if (inWord) {
      const PRUnichar *begin = aString.BeginReading();
      NS_ConvertUTF16toUTF8 utf8(begin + wordStart,
                                 wordLimit - wordStart);
      nsAutoTArray<char,200> utf8hyphens;
      utf8hyphens.SetLength(utf8.Length() + 5);
      char **rep = nullptr;
      int *pos = nullptr;
      int *cut = nullptr;
      int err = hnj_hyphen_hyphenate2((HyphenDict*)mDict,
                                      utf8.BeginReading(), utf8.Length(),
                                      utf8hyphens.Elements(), nullptr,
                                      &rep, &pos, &cut);
      if (!err) {
        // Surprisingly, hnj_hyphen_hyphenate2 converts the 'hyphens' buffer
        // from utf8 code unit indexing (which would match the utf8 input
        // string directly) to Unicode character indexing.
        // We then need to convert this to utf16 code unit offsets for Gecko.
        const char *hyphPtr = utf8hyphens.Elements();
        const PRUnichar *cur = begin + wordStart;
        const PRUnichar *end = begin + wordLimit;
        while (cur < end) {
          if (*hyphPtr & 0x01) {
            aHyphens[cur - begin] = true;
          }
          cur++;
          if (cur < end && NS_IS_LOW_SURROGATE(*cur) &&
              NS_IS_HIGH_SURROGATE(*(cur-1)))
          {
            cur++;
          }
          hyphPtr++;
        }
      }
    }
    
    inWord = false;
  }

  return NS_OK;
}
Esempio n. 6
0
void Hyphenator::slotHyphenate(PageItem* it)
{
	if ((!m_usable) || !(it->asTextFrame()) || (it->itemText.length() == 0))
		return;
	m_doc->DoDrawing = false;

	const char *word;
	char *buffer;
	const int BORDER = 2;
	QString text = "";
	QString buf;
	QByteArray te;

	int startC = 0;
	if (it->itemText.lengthOfSelection() > 0)
	{
		startC = it->itemText.startOfSelection();
		text = it->itemText.text(startC, it->itemText.lengthOfSelection());
	}
	else {
		text = it->itemText.text(0, it->itemText.length());
	}
	int firstC = 0;
	int lastC = 0;
	int Ccount = 0;
	QString found = "";
	QString found2 = "";
	rememberedWords.clear();
	//uint maxC = it->itemText.length() - 1;
	qApp->setOverrideCursor(QCursor(Qt::WaitCursor));
	QRegExp wordBoundary("\\w");
	QRegExp whiteSpace("\\s|\\W|\\d|\\n|\\r|\\t");
	while ((firstC+Ccount < signed(text.length())) && (firstC != -1) && 
			(lastC < signed(text.length())))
	{
		firstC = text.indexOf(wordBoundary, firstC+Ccount);
		if (firstC < 0)
			break;
		if (firstC > 0 && text.at(firstC-1) == SpecialChars::SHYPHEN)
		{
			Ccount = 1;
			continue;
		}
		lastC = text.indexOf(whiteSpace, firstC);
		if (lastC < 0)
			lastC = signed(text.length());
		Ccount = lastC - firstC;
		if (lastC < signed(text.length()) && text.at(lastC) == SpecialChars::SHYPHEN)
		{
			++Ccount;
			continue;
		}
		if (Ccount > MinWordLen-1)
		{
			found = text.mid(firstC, Ccount).toLower();
			found2 = text.mid(firstC, Ccount);
			if (found.contains(SpecialChars::SHYPHEN))
				break;

			NewDict(it->itemText.charStyle(firstC).language());

  			te = m_codec->fromUnicode( found );
			word = te.data();
			int wordlen = strlen(word);
			buffer = static_cast<char*>(malloc(wordlen+BORDER+3));
			if (buffer == NULL)
				break;
			char ** rep = NULL;
			int * pos = NULL;
			int * cut = NULL;
			if (!hnj_hyphen_hyphenate2(m_hdict, word, wordlen, buffer, NULL, &rep, &pos, &cut))
			{
	  			int i = 0;
  				buffer[wordlen] = '\0';
				bool hasHyphen = false;
				for (i = 1; i < found.length()-1; ++i)
				{
					if(buffer[i] & 1)
					{
						hasHyphen = true;
						break;
					}
				}
				QString outs = "";
				QString input = "";
				outs += found2[0];
				for (i = 1; i < found.length()-1; ++i)
				{
					outs += found2[i];
					if(buffer[i] & 1)
						outs += "-";
				}
				outs += found2.right(1);
				input = outs;
				if (!ignoredWords.contains(found2))
				{
					if (!hasHyphen)
						it->itemText.hyphenateWord(startC + firstC, found.length(), NULL);
					else if (Automatic)
					{
						if (specialWords.contains(found2))
						{
							outs = specialWords.value(found2);
							uint ii = 1;
							for (i = 1; i < outs.length()-1; ++i)
							{
								QChar cht = outs[i];
								if (cht == '-')
									buffer[ii-1] = 1;
								else
								{
									buffer[ii] = 0;
									++ii;
								}
							}
						}
						it->itemText.hyphenateWord(startC + firstC, found.length(), buffer);
					}
					else
					{
						if (specialWords.contains(found2))
						{
							outs = specialWords.value(found2);
							uint ii = 1;
							for (i = 1; i < outs.length()-1; ++i)
							{
								QChar cht = outs[i];
								if (cht == '-')
									buffer[ii-1] = 1;
								else
								{
									buffer[ii] = 0;
									++ii;
								}
							}
						}
						if (rememberedWords.contains(input))
						{
							outs = rememberedWords.value(input);
							uint ii = 1;
							for (i = 1; i < outs.length()-1; ++i)
							{
								QChar cht = outs[i];
								if (cht == '-')
									buffer[ii-1] = 1;
								else
								{
									buffer[ii] = 0;
									++ii;
								}
							}
							it->itemText.hyphenateWord(firstC, found.length(), buffer);
						}
						else
						{
							qApp->changeOverrideCursor(QCursor(Qt::ArrowCursor));
							PrefsContext* prefs = PrefsManager::instance()->prefsFile->getContext("hyhpen_options");
							int xpos = prefs->getInt("Xposition", -9999);
							int ypos = prefs->getInt("Yposition", -9999);
							HyAsk *dia = new HyAsk((QWidget*)parent(), outs);
							if ((xpos != -9999) && (ypos != -9999))
								dia->move(xpos, ypos);
							qApp->processEvents();
							if (dia->exec())
							{
								outs = dia->Wort->text();
								uint ii = 1;
								for (i = 1; i < outs.length()-1; ++i)
								{
									QChar cht = outs[i];
									if (cht == '-')
										buffer[ii-1] = 1;
									else
									{
										buffer[ii] = 0;
										++ii;
									}
								}
								if (!rememberedWords.contains(input))
									rememberedWords.insert(input, outs);
								if (dia->addToIgnoreList->isChecked())
								{
									if (!ignoredWords.contains(found2))
										ignoredWords.insert(found2);
								}
								if (dia->addToExceptionList->isChecked())
								{
									if (!specialWords.contains(found2))
										specialWords.insert(found2, outs);
								}
								it->itemText.hyphenateWord(firstC, found.length(), buffer);
							}
							else
							{
								free(buffer);
								buffer = NULL;
								prefs->set("Xposition", dia->xpos);
								prefs->set("Yposition", dia->ypos);
								delete dia;
								break;
							}
							prefs->set("Xposition", dia->xpos);
							prefs->set("Yposition", dia->ypos);
							delete dia;
							qApp->changeOverrideCursor(QCursor(Qt::WaitCursor));
						}
					}
				}
			}
			free(buffer);
			if (rep)
			{
				for (int i = 0; i < wordlen - 1; ++i)
					if (rep[i])
						free(rep[i]);
				free(rep);
			}
			if (pos) free(pos);
			if (cut) free(cut);
			buffer = NULL;
			rep = NULL;
			pos = NULL;
			cut = NULL;
		}
		if (Ccount == 0)
			Ccount++;
	}
	qApp->restoreOverrideCursor();
	m_doc->DoDrawing = true;
	rememberedWords.clear();
}
Esempio n. 7
0
nsresult
nsHyphenator::Hyphenate(const nsAString& aString,
                        nsTArray<PRPackedBool>& aHyphens)
{
  if (!aHyphens.SetLength(aString.Length())) {
    return NS_ERROR_OUT_OF_MEMORY;
  }
  memset(aHyphens.Elements(), PR_FALSE, aHyphens.Length());

  PRBool inWord = PR_FALSE;
  PRUint32 wordStart = 0, wordLimit = 0;
  for (PRUint32 i = 0; i < aString.Length(); i++) {
    PRUnichar ch = aString[i];

    nsIUGenCategory::nsUGenCategory cat = mCategories->Get(ch);
    if (cat == nsIUGenCategory::kLetter || cat == nsIUGenCategory::kMark) {
      if (!inWord) {
        inWord = PR_TRUE;
        wordStart = i;
      }
      wordLimit = i + 1;
      if (i < aString.Length() - 1) {
        continue;
      }
    }

    if (inWord) {
      NS_ConvertUTF16toUTF8 utf8(aString.BeginReading() + wordStart,
                                 wordLimit - wordStart);
      nsAutoTArray<char,200> utf8hyphens;
      utf8hyphens.SetLength(utf8.Length() + 5);
      char **rep = nsnull;
      int *pos = nsnull;
      int *cut = nsnull;
      int err = hnj_hyphen_hyphenate2((HyphenDict*)mDict,
                                      utf8.BeginReading(), utf8.Length(),
                                      utf8hyphens.Elements(), nsnull,
                                      &rep, &pos, &cut);
      if (!err) {
        PRUint32 utf16offset = wordStart;
        const char *cp = utf8.BeginReading();
        while (cp < utf8.EndReading()) {
          if (UTF8traits::isASCII(*cp)) { // single-byte utf8 char
            cp++;
            utf16offset++;
          } else if (UTF8traits::is2byte(*cp)) { // 2-byte sequence
            cp += 2;
            utf16offset++;
          } else if (UTF8traits::is3byte(*cp)) { // 3-byte sequence
            cp += 3;
            utf16offset++;
          } else { // must be a 4-byte sequence (no need to check validity,
                   // as this was just created with NS_ConvertUTF16toUTF8)
            NS_ASSERTION(UTF8traits::is4byte(*cp), "unexpected utf8 byte");
            cp += 4;
            utf16offset += 2;
          }
          NS_ASSERTION(cp <= utf8.EndReading(), "incomplete utf8 string?");
          NS_ASSERTION(utf16offset <= aString.Length(), "length mismatch?");
          if (utf8hyphens[cp - utf8.BeginReading() - 1] & 0x01) {
            aHyphens[utf16offset - 1] = PR_TRUE;
          }
        }
      }
    }
    
    inWord = PR_FALSE;
  }

  return NS_OK;
}