C++ (Cpp) Xml::getNumNodes Examples

Programming Language: C++ (Cpp)

Class/Type: Xml

Method/Function: getNumNodes

Examples at hotexamples.com: 2

C++ (Cpp) Xml::getNumNodes - 2 examples found. These are the top rated real world C++ (Cpp) examples of Xml::getNumNodes extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

parse(30)

s2(30)

s1(30)

etag(30)

unknown(30)

stag(30)

tag(30)

exists(23)

tagE(19)

getFirstChild(18)

getAttribute(17)

canWrite(16)

getNextChild(16)

put(15)

nput(14)

parseInt(12)

intTag(11)

set(11)

attach(10)

parse1(9)

getBooleanAttribute(8)

GetRoot(7)

skip(6)

serialize(6)

spannerId(6)

Read(5)

getText(4)

getProperty(4)

strTag(4)

Write(4)

getIntProperty(4)

Create(4)

header(3)

setArg(3)

get_key(3)

loadFile(2)

sTag(2)

init(2)

setData(2)

writeHtml(2)

getTagContent(2)

getChildNode(2)

getBooleanProperty(2)

IsValid(2)

findChildByPathAttr(2)

getIntAttribute(2)

close(2)

doubleTag(2)

getNumNodes(2)

getNodes(2)

Example #1

Show file

File: DataFeed.cpp Project: BILObilo/open-source-search-engine

void DataFeed::parse ( char *dataFeedPage,
		       long  dataFeedPageLen ) {
	// use Xml Class to parse up the page
	Xml xml;
	xml.set ( csUTF8, dataFeedPage, dataFeedPageLen, false, 0, false,
		  TITLEREC_CURRENT_VERSION );
	// get the nodes
	long numNodes  = xml.getNumNodes();
	XmlNode *nodes = xml.getNodes();
	// to count the tiers, result levels, and level costs
	long currTier = 0;
	long currResultLevel = 0;
	long currLevelCost = 0;
	// pull out the keywords for the data feed
	for (long i = 0; i < numNodes; i++) {
		// skip if this isn't a meta tag, shouldn't happen
		if (nodes[i].m_nodeId != 68)
			continue;
		// get the meta tag name
		//long tagLen;
		//char *tag = xml.getString(i, "name", &tagLen);
		long  ucTagLen;
		char *ucTag = xml.getString(i, "name", &ucTagLen);
		char tag[256];
		long tagLen = utf16ToLatin1 ( tag, 256,
					      (UChar*)ucTag, ucTagLen>>1 );
		// skip if empty
		if (!tag || tagLen <= 0)
			continue;
		// get the content
		long ucConLen;
		char *ucCon = xml.getString(i, "content", &ucConLen);
		char con[1024];
		long conLen = utf16ToLatin1 ( con, 1024,
					      (UChar*)ucCon, ucConLen>>1 );
		if (!con || conLen <= 0)
			continue;
		// match the meta tag to its local var and copy content
		if (tagLen == 10 && strncasecmp(tag, "customerid", 10) == 0)
			m_customerId = atoll(con);
		else if (tagLen == 11 && strncasecmp(tag, "datafeedurl", 11) == 0)
			setUrl(con, conLen);
		else if (tagLen == 8 && strncasecmp(tag, "passcode", 8) == 0)
			m_passcodeLen = setstr(m_passcode, MAX_PASSCODELEN, con, conLen);
		else if (tagLen == 6 && strncasecmp(tag, "status", 6) == 0)
			m_isActive = (bool)atoi(con);
		else if (tagLen == 6 && strncasecmp(tag, "locked", 6) == 0)
			m_isLocked = (bool)atoi(con);
		else if (tagLen == 14 && 
			 strncasecmp(tag, "dfcreationtime", 14) == 0)
			m_creationTime = atol(con);
		else if (tagLen == 8 && strncasecmp(tag, "numtiers", 8) == 0)
			m_priceTable.m_numTiers = atol(con);
		else if (tagLen == 15 && strncasecmp(tag, "numresultlevels", 15) == 0)
			m_priceTable.m_numResultLevels = atol(con);
		else if (tagLen == 10 && strncasecmp(tag, "monthlyfee", 10) == 0)
			m_priceTable.m_monthlyFee = atol(con);
		else if (tagLen == 7 && strncasecmp(tag, "tiermax", 7) == 0) {
			m_priceTable.m_tierMax[currTier] = (unsigned long)atol(con);
			currTier++;
		}
		else if (tagLen == 11 && strncasecmp(tag, "resultlevel", 11) == 0) {
			m_priceTable.m_resultLevels[currResultLevel] = (unsigned long)atol(con);
			currResultLevel++;
		}
		else if (tagLen == 9 && strncasecmp(tag, "levelcost", 9) == 0) {
			m_priceTable.m_levelCosts[currLevelCost] = (unsigned long)atol(con);
			currLevelCost++;
		}
		else
			log(LOG_INFO, "datafeed: Invalid Meta Tag Parsed [%li]:"
			    " %s", tagLen, tag);
	}
}

Example #2

Show file

File: fctypes.cpp Project: privacore/open-source-search-engine

// returns length of stripped content, but will set g_errno and return -1
// on error
int32_t stripHtml( char *content, int32_t contentLen, int32_t version, int32_t strip ) {
	if ( !strip ) {
		log( LOG_WARN, "query: html stripping not required!" );
		return contentLen;
	}
	if ( ! content )
		return 0;
	if ( contentLen == 0 )
		return 0;

	// filter content if we should
	// keep this on the big stack so "content" still references something
	Xml tmpXml;
	// . get the content as xhtml (should be NULL terminated)
	// . parse as utf8 since all we are doing is messing with 
	//   the tags...content manipulation comes later
	if ( !tmpXml.set( content, contentLen, version, CT_HTML ) ) {
		return -1;
	}

	//if( strip == 4 )
	//	return tmpXml.getText( content, contentLen );

	// go tag by tag
	int32_t     n       = tmpXml.getNumNodes();
	XmlNode *nodes   = tmpXml.getNodes();
	// Xml class may have converted to utf16
	content    = tmpXml.getContent();
	contentLen = tmpXml.getContentLen();
	char    *x       = content;
	char    *xend    = content + contentLen;
	int32_t     stackid = -1;
	int32_t     stackc  =  0;
	char     skipIt  =  0;
	// . hack COL tag to NOT require a back tag
	// . do not leave it that way as it could mess up our parsing
	//g_nodes[25].m_hasBackTag = 0;
	for ( int32_t i = 0 ; i < n ; i++ ) {
		// get id of this node
		int32_t id = nodes[i].m_nodeId;
		
		// if strip is 4, just remove the script tag
		if( strip == 4 ){
			if ( id ){
				if ( id == TAG_SCRIPT ){
					skipIt ^= 1;
					continue;
				}
			}
			else if ( skipIt ) continue;
			goto keepit;
		}
		
		// if strip is 3, ALL tags will be removed!
		if( strip == 3 ) {
			if( id ) {
				// . we dont want anything in between:
				//   - script tags (83)
				//   - style tags  (111)
				if ((id == TAG_SCRIPT) || (id == TAG_STYLE)) skipIt ^= 1;
				// save img to have alt text kept.
				if ( id == TAG_IMG  ) goto keepit;
				continue;
			}
			else {
				if( skipIt ) continue;
				goto keepit;
			}
		}
		// get it
		int32_t fk;
		if   ( strip == 1 ) fk = g_nodes[id].m_filterKeep1;
		else                fk = g_nodes[id].m_filterKeep2;
		// if tag is <link ...> only keep it if it has
		// rel="stylesheet" or rel=stylesheet
		if ( strip == 2 && id == TAG_LINK ) { // <link> tag id
			int32_t   fflen;
			char *ff = nodes[i].getFieldValue ( "rel" , &fflen );
			if ( ff && fflen == 10 &&
			     strncmp(ff,"stylesheet",10) == 0 )
				goto keepit;
		}
		// just remove just the tag if this is 2
		if ( fk == 2 ) continue;
		// keep it if not in a stack
		if ( ! stackc && fk ) goto keepit;
		// if no front/back for tag, just skip it
		if ( ! nodes[i].m_hasBackTag ) continue;
		// start stack if none
		if ( stackc == 0 ) {
			// but not if this is a back tag
			if ( nodes[i].m_node[1] == '/' ) continue;
			// now start the stack
			stackid = id;
			stackc  =  1;
			continue;
		}
		// skip if this tag does not match what is on stack
		if ( id != stackid ) continue;
		// if ANOTHER front tag, inc stack
		if ( nodes[i].m_node[1] != '/' ) stackc++;
		// otherwise, dec the stack count
		else                             stackc--;
		// . ensure not negative from excess back tags
		// . reset stackid to -1 to indicate no stack
		if ( stackc <= 0 ) { stackid= -1; stackc = 0; }
		// skip it
		continue;
	keepit:
		// replace images with their alt text
		int32_t vlen;
		char *v;
		if ( id == TAG_IMG ) {
			v = nodes[i].getFieldValue("alt", &vlen );
			// try title if no alt text
			if ( ! v )
				v = nodes[i].getFieldValue("title", &vlen );
			if ( v ) { gbmemcpy ( x, v, vlen ); x += vlen; }
			continue;
		}
		// remove background image from body,table,td tags
		if ( id == TAG_BODY || id == TAG_TABLE || id == TAG_TD ) {
			v = nodes[i].getFieldValue("background", &vlen);
			// remove background, just sabotage it
			if ( v ) v[-4] = 'x';
		}
		// store it
		gbmemcpy ( x , nodes[i].m_node , nodes[i].m_nodeLen );
		x += nodes[i].m_nodeLen;
		// sanity check
		if ( x > xend ) { g_process.shutdownAbort(true);}
	}
	contentLen = x - content;
	content [ contentLen ] = '\0';
	// unhack COL tag
	//g_nodes[25].m_hasBackTag = 1;
	return contentLen;
}