/*! Detect encoding of text buffer \param pData Buffer \param lenData Length of buffer Supports UTF-8 and UCS-2 big and little endian CJK encodings could be added */ UT_Error IE_Imp_Text::_recognizeEncoding(const char *szBuf, UT_uint32 iNumbytes) { if (IE_Imp_Text_Sniffer::_recognizeUTF8(szBuf, iNumbytes)) _setEncoding("UTF-8"); else { IE_Imp_Text_Sniffer::UCS2_Endian eUcs2 = IE_Imp_Text_Sniffer::UE_NotUCS; eUcs2 = IE_Imp_Text_Sniffer::_recognizeUCS2(szBuf, iNumbytes, false); if (eUcs2 == IE_Imp_Text_Sniffer::UE_BigEnd) _setEncoding(XAP_EncodingManager::get_instance()->getUCS2BEName()); else if (eUcs2 == IE_Imp_Text_Sniffer::UE_LittleEnd) _setEncoding(XAP_EncodingManager::get_instance()->getUCS2LEName()); else _setEncoding( #ifdef TOOLKIT_WIN XAP_EncodingManager::get_instance()->getNative8BitEncodingName() #else "ISO-8859-1" #endif ); } return UT_OK; }
void XAP_UnixDialog_Encoding::event_Ok(void) { GtkTreeSelection * selection; GtkTreeIter iter; GtkTreeModel * model; gint row = 0; selection = gtk_tree_view_get_selection( GTK_TREE_VIEW(m_listEncodings) ); // if there is no selection, or the selection's data (GtkListItem widget) // is empty, return cancel. GTK can make this happen. if ( !selection || !gtk_tree_selection_get_selected (selection, &model, &iter) ) { _setAnswer (XAP_Dialog_Encoding::a_CANCEL); return; } // get the ID of the selected Type gtk_tree_model_get (model, &iter, 1, &row, -1); if (row >= 0) { _setSelectionIndex(static_cast<UT_uint32>(row)); _setEncoding (_getAllEncodings()[row]); _setAnswer (XAP_Dialog_Encoding::a_OK); } else { UT_ASSERT_NOT_REACHED(); _setAnswer (XAP_Dialog_Encoding::a_CANCEL); } }
IE_Exp_Text::IE_Exp_Text(PD_Document * pDocument, bool bEncoded) : IE_Exp(pDocument), m_pListener(NULL), m_bIsEncoded(false), m_szEncoding(0), m_bExplicitlySetEncoding(false), m_bIs16Bit(false), m_bUnicode(false), m_bBigEndian(false), m_bUseBOM(false) { m_error = UT_OK; // Get encoding dialog prefs setting bool bAlwaysPrompt = false; XAP_App::getApp()->getPrefsValueBool(AP_PREF_KEY_AlwaysPromptEncoding, &bAlwaysPrompt); m_bIsEncoded = bAlwaysPrompt | bEncoded; const char *szEncodingName = pDocument->getEncodingName(); if (!szEncodingName || !*szEncodingName) szEncodingName = XAP_EncodingManager::get_instance()->getNativeEncodingName(); _setEncoding(szEncodingName); }
UT_Error IE_Exp_Text::_writeDocument(void) { // Don't call base method if user cancels encoding dialog if (!(!m_bIsEncoded || m_bExplicitlySetEncoding || _doEncodingDialog(m_szEncoding))) return UT_SAVE_CANCELLED; // TODO If we're going to the clipboard and the OS supports unicode, set encoding. // TODO Only supports Windows so far. // TODO Should use a finer-grain technique than IsWinNT() since Win98 supports unicode clipboard. if (getDocRange()) { #ifdef WIN32 if (UT_IsWinNT()) _setEncoding(XAP_EncodingManager::get_instance()->getNativeUnicodeEncodingName()); #endif } m_pListener = _constructListener(); if (!m_pListener) return UT_IE_NOMEMORY; if (getDocRange()) getDoc()->tellListenerSubset(static_cast<PL_Listener *>(m_pListener),getDocRange()); else getDoc()->tellListener(static_cast<PL_Listener *>(m_pListener)); DELETEP(m_pListener); return ((m_error) ? UT_IE_COULDNOTWRITE : UT_OK); }
PL_Listener * IE_Exp_Text::_constructListener(void) { if (!m_bExplicitlySetEncoding) { const std::string & prop = getProperty ("encoding"); if (!prop.empty()) { _setEncoding (prop.c_str()); } } return new Text_Listener(getDoc(),this,(getDocRange()!=NULL),m_szEncoding, m_bIs16Bit,m_bUnicode,m_bUseBOM,m_bBigEndian); }
bool IE_Imp_Text::pasteFromBuffer(PD_DocumentRange * pDocRange, const unsigned char * pData, UT_uint32 lenData, const char *szEncoding) { UT_return_val_if_fail(getDoc() == pDocRange->m_pDoc,false); UT_return_val_if_fail(pDocRange->m_pos1 == pDocRange->m_pos2,false); // Attempt to guess whether we're pasting 8 bit or unicode text if (szEncoding) _setEncoding(szEncoding); else _recognizeEncoding(reinterpret_cast<const char *>(pData), lenData); ImportStreamClipboard stream(pData, lenData); setClipboard (pDocRange->m_pos1); _parseStream(&stream); return true; }
IE_Imp_Text::IE_Imp_Text(PD_Document * pDocument, const char * encoding) : IE_Imp(pDocument), m_szEncoding(0), m_bExplicitlySetEncoding(false), m_bIsEncoded(false), m_bIs16Bit(false), m_bUseBOM(false), m_bBigEndian(false), m_bBlockDirectionPending(true), m_bFirstBlockData(true), m_pBlock(0) { m_bIsEncoded = ((encoding != NULL) && (strlen(encoding) > 0)); if ( m_bIsEncoded ) { m_bExplicitlySetEncoding = true ; _setEncoding(encoding); } }
IE_Exp_Text::IE_Exp_Text(PD_Document * pDocument, const char * encoding) : IE_Exp(pDocument), m_pListener(NULL), m_bIsEncoded(false), m_szEncoding(0), m_bExplicitlySetEncoding(false), m_bIs16Bit(false), m_bUnicode(false), m_bBigEndian(false), m_bUseBOM(false) { m_error = UT_OK; m_bIsEncoded = ((encoding != NULL) && (strlen(encoding) > 0)); if ( m_bIsEncoded ) { m_bExplicitlySetEncoding = true; _setEncoding(encoding); } }
/*! Request file encoding from user This function should be identical to the one in ie_Exp_Text */ bool IE_Imp_Text::_doEncodingDialog(const char *szEncoding) { XAP_Dialog_Id id = XAP_DIALOG_ID_ENCODING; XAP_DialogFactory * pDialogFactory = static_cast<XAP_DialogFactory *>(XAP_App::getApp()->getDialogFactory()); XAP_Dialog_Encoding * pDialog = static_cast<XAP_Dialog_Encoding *>(pDialogFactory->requestDialog(id)); UT_return_val_if_fail(pDialog, false); pDialog->setEncoding(szEncoding); // run the dialog XAP_Frame * pFrame = XAP_App::getApp()->getLastFocussedFrame(); UT_return_val_if_fail(pFrame, false); pDialog->runModal(pFrame); // extract what they did bool bOK = (pDialog->getAnswer() == XAP_Dialog_Encoding::a_OK); if (bOK) { const gchar * s; static gchar szEnc[16]; s = pDialog->getEncoding(); UT_return_val_if_fail (s, false); strcpy(szEnc,s); _setEncoding(static_cast<const char *>(szEnc)); getDoc()->setEncodingName(szEnc); } pDialogFactory->releaseDialog(pDialog); return bOK; }
/* Construct text importer \param pDocument Document to import text into \param bEncoded True if we should show encoding dialog Uses current document's encoding if it is set */ IE_Imp_Text::IE_Imp_Text(PD_Document * pDocument, bool bEncoded) : IE_Imp(pDocument), m_szEncoding(0), m_bExplicitlySetEncoding(false), m_bIsEncoded(false), m_bIs16Bit(false), m_bUseBOM(false), m_bBigEndian(false), m_bBlockDirectionPending(true), m_bFirstBlockData(true), m_pBlock(NULL) { // Get encoding dialog prefs setting bool bAlwaysPrompt; XAP_App::getApp()->getPrefsValueBool(AP_PREF_KEY_AlwaysPromptEncoding, &bAlwaysPrompt); m_bIsEncoded = bAlwaysPrompt | bEncoded; const char *szEncodingName = pDocument->getEncodingName(); if (!szEncodingName || !*szEncodingName) szEncodingName = XAP_EncodingManager::get_instance()->getNativeEncodingName(); _setEncoding(szEncodingName); }
/*! Parse stream contents into the document \param stream Stream to import from This code is used for both files and the clipboard */ UT_Error IE_Imp_Text::_parseStream(ImportStream * pStream) { UT_return_val_if_fail(pStream, UT_ERROR); bool bFirstChar = true; UT_GrowBuf gbBlock(1024); UT_UCSChar c; if (!m_bExplicitlySetEncoding) { std::string prop; prop = getProperty ("encoding"); if (!prop.empty()) { _setEncoding (prop.c_str()); } } pStream->init(m_szEncoding); while (pStream->getChar(c)) { // TODO We should switch fonts when we encounter // TODO characters from different scripts switch (c) { case UCS_CR: case UCS_LF: case UCS_LINESEP: case UCS_PARASEP: // we interpret either CRLF, CR, or LF as a paragraph break. // we also accept U+2028 (line separator) and U+2029 (para separator) // especially since these are recommended by Mac OS X. // flush out what we have if (gbBlock.getLength() > 0) X_ReturnNoMemIfError(_insertSpan(gbBlock)); X_ReturnNoMemIfError(_insertBlock()); break; case UCS_BOM: // This is Byte Order Mark at the start of file, Zero Width Non Joiner elsewhere if (bFirstChar) break; // if we encounter any of the following characters we will // substitute a '?' as they correspond to control characters, // though some text files use them for their character representations // We do this instead of of immediately returning an error // (and assuming they have no business in a text file) so we can // still show usable text to a user who has one of these files. case 0x0000: case 0x0001: case 0x0002: case 0x0003: case 0x0004: case 0x0005: case 0x0006: case 0x0007: case 0x0008: case 0x000e: case 0x000f: case 0x0010: case 0x0011: case 0x0012: case 0x0013: case 0x0014: case 0x0015: case 0x0016: case 0x0017: case 0x0018: case 0x0019: case 0x001a: case 0x001b: case 0x001c: case 0x001d: case 0x001e: case 0x001f: // UT_ASSERT(!(c <= 0x001f)); c = '?'; /* return UT_ERROR; // fall through with modified character */ default: X_ReturnNoMemIfError(gbBlock.append(reinterpret_cast<UT_GrowBufElement*>(&c),1)); break; } bFirstChar = false; } if (gbBlock.getLength() > 0) X_ReturnNoMemIfError(_insertSpan(gbBlock)); return UT_OK; }