Ejemplo n.º 1
0
HRESULT ElementRatings::ValidateXML(const CStdString& sXML, const CStdString& sXSD)
{
	try
	{
		if(m_sSchemaFilePath.IsEmpty())
		{
			m_sSchemaFilePath = CGeneral::GetTemporaryFileNameWithExtension(_T("xsd"));

			CTextFileWrite docWriter(m_sSchemaFilePath.c_str(), CTextFileBase::UTF_8);
			docWriter.Write(sXSD.c_str());
			docWriter.Close();
		}

		if(m_pXMLSchemaDoc == NULL)
		{
			m_pXMLSchemaDoc.CreateInstance((_T("Msxml2.DOMDocument.6.0")));
			m_pXMLSchemaDoc->async = VARIANT_FALSE;

			MSXML2::IXMLDOMSchemaCollection2Ptr pSchemas;
			pSchemas.CreateInstance((_T("Msxml2.XMLSchemaCache.6.0")));
			pSchemas->add(_T(""), _variant_t(m_sSchemaFilePath.c_str()));

			VARIANT varValue;
			varValue.vt = VT_DISPATCH;
			varValue.pdispVal = pSchemas;
			m_pXMLSchemaDoc->PutRefschemas(varValue);
			m_pXMLSchemaDoc->loadXML(sXML.c_str());
		}

		MSXML2::IXMLDOMParseErrorPtr pError = NULL;
		if(FAILED(m_pXMLSchemaDoc->raw_validate(&pError)))
		{
			CStdString sErr;
			sErr.Format(_T("The xml did not conform to the specified schema. Error: %s"), pError->reason);
			LOG_WS_ERROR(sErr.c_str());
			return E_FAIL;
		}
	}
	catch(_com_error& e)
	{
		CStdString sInfo = _T("Validation failed : ") + CStdString(e.ErrorMessage());
		LOG_WS_ERROR(sInfo.c_str());
		return e.Error();
	}
	return S_OK;
}
    void DocInverterPerField::processFields(Collection<FieldablePtr> fields, int32_t count)
    {
        fieldState->reset(docState->doc->getBoost());
        
        int32_t maxFieldLength = docState->maxFieldLength;
        bool doInvert = consumer->start(fields, count);
        DocumentsWriterPtr docWriter(docState->_docWriter);
        DocInverterPerThreadPtr perThread(_perThread);
        
        for (int32_t i = 0; i < count; ++i)
        {
            FieldablePtr field = fields[i];
            if (field->isIndexed() && doInvert)
            {
                bool anyToken;
                
                if (fieldState->length > 0)
                    fieldState->position += docState->analyzer->getPositionIncrementGap(fieldInfo->name);
                
                if (!field->isTokenized())
                {
                    // un-tokenized field
                    String stringValue(field->stringValue());
                    int32_t valueLength = (int32_t)stringValue.length();
                    perThread->singleToken->reinit(stringValue, 0, valueLength);
                    fieldState->attributeSource = perThread->singleToken;
                    consumer->start(field);

                    bool success = false;
                    LuceneException finally;
                    try
                    {
                        consumer->add();
                        success = true;
                    }
                    catch (LuceneException& e)
                    {
                        finally = e;
                    }
                    if (!success)
                        docWriter->setAborting();
                    finally.throwException();
                    fieldState->offset += valueLength;
                    ++fieldState->length;
                    ++fieldState->position;
                    anyToken = (valueLength > 0);
                }
                else
                {
                    // tokenized field
                    TokenStreamPtr stream;
                    TokenStreamPtr streamValue(field->tokenStreamValue());
                    
                    if (streamValue)
                        stream = streamValue;
                    else
                    {
                        // the field does not have a TokenStream, so we have to obtain one from the analyzer
                        ReaderPtr reader; // find or make Reader
                        ReaderPtr readerValue(field->readerValue());
                        
                        if (readerValue)
                            reader = readerValue;
                        else
                        {
                            String stringValue(field->stringValue());
                            perThread->stringReader->init(stringValue);
                            reader = perThread->stringReader;
                        }
                        
                        // Tokenize field and add to postingTable
                        stream = docState->analyzer->reusableTokenStream(fieldInfo->name, reader);
                    }
                    
                    // reset the TokenStream to the first token
                    stream->reset();
                    
                    int32_t startLength = fieldState->length;
                    
                    LuceneException finally;
                    try
                    {
                        int32_t offsetEnd = fieldState->offset - 1;
                        
                        bool hasMoreTokens = stream->incrementToken();
                        
                        fieldState->attributeSource = stream;
                        
                        OffsetAttributePtr offsetAttribute(fieldState->attributeSource->addAttribute<OffsetAttribute>());
                        PositionIncrementAttributePtr posIncrAttribute(fieldState->attributeSource->addAttribute<PositionIncrementAttribute>());
                        
                        consumer->start(field);
                    
                        while (true)
                        {
                            // If we hit an exception in stream.next below (which is fairly common, eg if analyzer
                            // chokes on a given document), then it's non-aborting and (above) this one document
                            // will be marked as deleted, but still consume a docID
                            if (!hasMoreTokens)
                                break;
                            
                            int32_t posIncr = posIncrAttribute->getPositionIncrement();
                            fieldState->position += posIncr;
                            if (fieldState->position > 0)
                                --fieldState->position;
                            
                            if (posIncr == 0)
                                ++fieldState->numOverlap;
                            
                            bool success = false;
                            try
                            {
                                // If we hit an exception in here, we abort all buffered documents since the last
                                // flush, on the likelihood that the internal state of the consumer is now corrupt 
                                // and should not be flushed to a new segment
                                consumer->add();
                                success = true;
                            }
                            catch (LuceneException& e)
                            {
                                finally = e;
                            }
                            if (!success)
                                docWriter->setAborting();
                            finally.throwException();
                            ++fieldState->position;
                            offsetEnd = fieldState->offset + offsetAttribute->endOffset();
                            if (++fieldState->length >= maxFieldLength)
                            {
                                if (docState->infoStream)
                                    *docState->infoStream << L"maxFieldLength " << StringUtils::toString(maxFieldLength) << L" reached for field " << fieldInfo->name << L", ignoring following tokens\n";
                                break;
                            }
                            
                            hasMoreTokens = stream->incrementToken();
                        }
                        
                        // trigger streams to perform end-of-stream operations
                        stream->end();
                        
                        fieldState->offset += offsetAttribute->endOffset();
                        anyToken = (fieldState->length > startLength);
                    }
                    catch (LuceneException& e)
                    {
                        finally = e;
                    }
                    stream->close();
                    finally.throwException();
                }
                
                if (anyToken)
                    fieldState->offset += docState->analyzer->getOffsetGap(field);
                fieldState->boost *= field->getBoost();
            }
            
            // don't hang onto the field
            fields[i].reset();
        }
        
        consumer->finish();
        endConsumer->finish();
    }