static void addNbsps(DFNode *node, int inParagraph, int *havePrecedingSpace) { switch (node->tag) { case WORD_P: inParagraph = 1; *havePrecedingSpace = 1; break; case DOM_TEXT: { if (node->parent->tag != WORD_T) break; uint32_t *chars = DFUTF8To32(node->value); size_t length = DFUTF32Length(chars); for (size_t i = 0; i < length; i++) { if (chars[i] == ' ') { if (*havePrecedingSpace) chars[i] = DFNbspChar; else *havePrecedingSpace = 1; } else { *havePrecedingSpace = 0; } } char *value = DFUTF32to8(chars); DFSetNodeValue(node,value); free(value); free(chars); break; } } for (DFNode *child = node->first; child != NULL; child = child->next) addNbsps(child,inParagraph,havePrecedingSpace); }
static void removeNbsps(DFNode *node) { if (node->tag == DOM_TEXT) { uint32_t *chars = DFUTF8To32(node->value); size_t length = DFUTF32Length(chars); for (size_t i = 0; i < length; i++) { if (chars[i] == DFNbspChar) chars[i] = ' '; } char *value = DFUTF32to8(chars); DFSetNodeValue(node,value); free(value); free(chars); } for (DFNode *child = node->first; child != NULL; child = child->next) removeNbsps(child); }
void DFStripWhitespace(DFNode *node) { if (node->tag == DOM_TEXT) { char *trimmed = DFStringTrimWhitespace(node->value); if ((strlen(trimmed) == 0) && (node->parent != NULL)) DFRemoveNode(node); else DFSetNodeValue(node,trimmed); free(trimmed); } else { if (node->tag >= MIN_ELEMENT_TAG) { const char *space = DFGetAttribute(node,XML_SPACE); if ((space != NULL) && !strcmp(space,"preserve")) return; } DFNode *next; for (DFNode *child = node->first; child != NULL; child = next) { next = child->next; DFStripWhitespace(child); } } }
static void extractPrefixRecursive(DFNode *node, const char *counterName, DFBuffer *result, int *foundSeq, int *foundContent) { if (isSeqField(node)) { if (result->len > 0) DFBufferFormat(result," "); DFBufferFormat(result,"counter(%s)",counterName); *foundSeq = 1; DFRemoveNode(node); return; } if (node->tag == DOM_TEXT) { size_t valueLen = strlen(node->value); size_t pos = 0; if (*foundSeq) { size_t offset = 0; uint32_t ch; do { pos = offset; ch = DFNextChar(node->value,&offset); } while ((ch != 0) && (DFCharIsWhitespaceOrNewline(ch) || DFCharIsPunctuation(ch))); } else { pos = valueLen; } if (pos == valueLen) { if (result->len > 0) DFBufferFormat(result," "); char *quotedValue = DFQuote(node->value); DFBufferFormat(result,"%s",quotedValue); free(quotedValue); DFRemoveNode(node); if (*foundSeq) *foundContent = 1; return; } else if (pos > 0) { char *first = DFSubstring(node->value,0,pos); char *rest = DFSubstring(node->value,pos,valueLen); if (result->len > 0) DFBufferFormat(result," "); char *quotedFirst = DFQuote(first); DFBufferFormat(result,"%s",quotedFirst); free(quotedFirst); DFSetNodeValue(node,rest); if (*foundSeq) *foundContent = 1; free(first); free(rest); return; } } int wasEmpty = (node->first == NULL); DFNode *next; for (DFNode *child = node->first; child != NULL; child = next) { next = child->next; if (*foundContent) break; extractPrefixRecursive(child,counterName,result,foundSeq,foundContent); } int isEmpty = (node->first == NULL); if ((node->tag == HTML_SPAN) && isEmpty && !wasEmpty) DFRemoveNode(node); }