void HText_beginAnchor (HText * text, HTChildAnchor * anchor) { TextAnchor * a; if (text && anchor) { Robot * mr = (Robot *) HTRequest_context(text->request); HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor); HTParentAnchor * dest_parent = HTAnchor_parent(dest); char * uri = HTAnchor_address((HTAnchor *) dest_parent); #if 0 if (SHOW_MSG) HTTrace("Robot....... Found `%s\' \n", uri ? uri : "NULL"); #endif if (uri) { HTList_addObject(mr->urilist, (void *) uri); mr->count++; } if ((a = (TextAnchor *) HT_MALLOC(sizeof(*a))) == NULL) HT_OUTOFMEM("HText_beginAnchor"); if (text->last_anchor) { text->last_anchor->next = a; } else { text->first_anchor = a; } a->next = 0; a->anchor = anchor; text->last_anchor = a; if (HTAnchor_followMainLink((HTAnchor*)anchor)) { a->number = ++(text->anchors); } else { a->number = 0; } } }
/* Start an anchor field */ PUBLIC void LMHText_beginAnchor (HText * text, int elem_num, int attr_num, HTChildAnchor * anc, const BOOL *present, const char **value) { TextAnchor * a; /* this is because it's called as link callback */ if (elem_num != HTML_A) return; if ((a = (TextAnchor *) HT_MALLOC(sizeof(*a))) == NULL) HT_OUTOFMEM("HText_beginAnchor"); a->start = text->chars + text->last_line->size; a->extent = 0; if (text->last_anchor) { text->last_anchor->next = a; } else { text->first_anchor = a; } a->next = 0; a->anchor = anc; text->last_anchor = a; text->current_anchor = a; if (HTAnchor_followMainLink((HTAnchor*)anc)) { a->number = ++(text->last_anchor_number); } else { a->number = 0; } }
HTAnchor * HTHistory_moveBy ARGS1 (int,offset) { HTAnchor * last = HTList_objectAt (history, 1); if (! last) return NULL; /* No last visited node */ if (last != (HTAnchor *) last->parent) { /* Was a child */ HTList * kids = last->parent->children; int i = HTList_indexOf (kids, last); HTAnchor * nextOne = HTList_objectAt (kids, i - offset); if (nextOne) { HTAnchor * destination = HTAnchor_followMainLink (nextOne); if (destination) { HTList_removeLastObject (history); HTList_removeLastObject (history); HTList_addObject (history, nextOne); HTList_addObject (history, destination); } return destination; } else { if (TRACE) fprintf(stderr, "HTHistory_moveBy: offset by %+d goes out of list %p.\n", offset, kids); return NULL; } } else { /* Was a parent */ return NULL; /* FIXME we could possibly follow the next link... */ } }
char *Reference_List (HText *text, BOOL titles) { char *temp = malloc(1000); char *output = malloc(1000); int refs = HText_sourceAnchors(text); if (refs <= 0) { return("\n\nThere are no references from this document.\n\n"); } else { int cnt; StrAllocCat(output,"\n*** References from this document ***\n"); for (cnt=1; cnt<=refs; cnt++) { HTAnchor *dest = HTAnchor_followMainLink((HTAnchor *) HText_childNumber(text, cnt)); HTParentAnchor * parent = HTAnchor_parent(dest); char * address = HTAnchor_address(dest); const char * title = titles ? HTAnchor_title(parent) : NULL; sprintf(temp, "[%d] ", cnt); StrAllocCat(output, temp); sprintf(temp, "%s\n", (char *)(title ? title : address)); StrAllocCat(output, temp); HT_FREE(address); } } }
void HText_appendImage (HText * text, HTChildAnchor * anchor, const char *alt, const char * align, BOOL isMap) { if (text && anchor) { HTParentAnchor * dest = (HTParentAnchor *) HTAnchor_followMainLink((HTAnchor *) anchor); char * uri = HTAnchor_address((HTAnchor *) dest); if (SHOW_MSG) { HTTrace("Image %s", uri); } HT_FREE(uri); } }
PRIVATE void foundLink (HText * text, int element_number, int attribute_number, HTChildAnchor * anchor, const BOOL * present, const char ** value) { if (anchor) { /* ** Find out which link we got. The anchor we are passed is ** a child anchor of the anchor we are current parsing. We ** have to go from this child anchor to the actual destination. */ HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor); char * address = HTAnchor_address(dest); HTPrint("Found link `%s\'\n", address); HT_FREE(address); } }
PRIVATE void HTML_start_element (HTStructured * me, int element_number, const BOOL * present, const char ** value) { HTChildAnchor * address = NULL; if (!me->started) { HTextImp_build(me->text, HTEXT_BEGIN); me->started = YES; } /* Look at what element was started */ switch (element_number) { case HTML_A: if (present[HTML_A_HREF] && value[HTML_A_HREF]) { address = HTAnchor_findChildAndLink( me->node_anchor, /* parent */ present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL, /* Tag */ value[HTML_A_HREF], /* Addresss */ present[HTML_A_REL] && value[HTML_A_REL] ? (HTLinkType) HTAtom_caseFor(value[HTML_A_REL]) : NULL); if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) { HTLink * link = HTAnchor_mainLink((HTAnchor *) address); HTParentAnchor * dest = HTAnchor_parent(HTLink_destination(link)); if (!HTAnchor_title(dest)) HTAnchor_setTitle(dest, value[HTML_A_TITLE]); } HTextImp_foundLink(me->text, element_number, HTML_A_HREF, address, present, value); HTTRACE(SGML_TRACE, "HTML Parser. Anchor `%s\'\n" _ value[HTML_A_HREF]); } break; case HTML_AREA: if (present[HTML_AREA_HREF] && value[HTML_AREA_HREF]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_AREA_HREF], NULL); HTextImp_foundLink(me->text, element_number, HTML_AREA_HREF, address, present, value); HTTRACE(SGML_TRACE, "HTML Parser. Image map area `%s\'\n" _ value[HTML_AREA_HREF]); } break; case HTML_BASE: if (present[HTML_BASE_HREF] && value[HTML_BASE_HREF]) { HTAnchor_setBase(me->node_anchor, (char *) value[HTML_BASE_HREF]); HTTRACE(SGML_TRACE, "HTML Parser. New base `%s\'\n" _ value[HTML_BASE_HREF]); } break; case HTML_BODY: if (present[HTML_BODY_BACKGROUND] && value[HTML_BODY_BACKGROUND]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_BODY_BACKGROUND], NULL); HTextImp_foundLink(me->text, element_number, HTML_BODY_BACKGROUND, address, present, value); HTTRACE(SGML_TRACE, "HTML Parser. Background `%s\'\n" _ value[HTML_BODY_BACKGROUND]); } break; case HTML_FORM: if (present[HTML_FORM_ACTION] && value[HTML_FORM_ACTION]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_FORM_ACTION], NULL); HTextImp_foundLink(me->text, element_number, HTML_FORM_ACTION, address, present, value); } break; case HTML_FRAME: if (present[HTML_FRAME_SRC] && value[HTML_FRAME_SRC]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_FRAME_SRC], NULL); HTextImp_foundLink(me->text, element_number, HTML_FRAME_SRC, address, present, value); HTTRACE(SGML_TRACE, "HTML Parser. Frame `%s\'\n" _ value[HTML_FRAME_SRC]); } break; case HTML_INPUT: if (present[HTML_INPUT_SRC] && value[HTML_INPUT_SRC]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_INPUT_SRC], NULL); HTextImp_foundLink(me->text, element_number, HTML_INPUT_SRC, address, present, value); } break; case HTML_IMG: if (present[HTML_IMG_SRC] && value[HTML_IMG_SRC]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_IMG_SRC], NULL); HTextImp_foundLink(me->text, element_number, HTML_IMG_SRC, address, present, value); } break; case HTML_ISINDEX: HTAnchor_setIndex(me->node_anchor); break; case HTML_LINK: if (present[HTML_LINK_HREF] && value[HTML_LINK_HREF]) { HTParentAnchor * dest = NULL; address = HTAnchor_findChildAndLink( me->node_anchor, /* parent */ present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL, /* Tag */ present[HTML_A_HREF] ? value[HTML_A_HREF] : NULL, /* Addresss */ NULL); /* Rels */ dest = HTAnchor_parent(HTAnchor_followMainLink((HTAnchor *) address)); /* If forward reference */ if ((present[HTML_LINK_REL] && value[HTML_LINK_REL])) { char * strval = NULL; char * ptr = NULL; char * relation = NULL; StrAllocCopy(strval, value[HTML_LINK_REL]); ptr = strval; while ((relation = HTNextLWSToken(&ptr)) != NULL) { HTLink_add((HTAnchor *) me->node_anchor, (HTAnchor *) dest, (HTLinkType) HTAtom_caseFor(relation), METHOD_INVALID); } HT_FREE(strval); } /* If reverse reference */ if ((present[HTML_LINK_REV] && value[HTML_LINK_REV])) { char * strval = NULL; char * ptr = NULL; char * relation = NULL; StrAllocCopy(strval, value[HTML_LINK_REV]); ptr = strval; while ((relation = HTNextLWSToken(&ptr)) != NULL) { HTLink_add((HTAnchor *) dest, (HTAnchor *) me->node_anchor, (HTLinkType) HTAtom_caseFor(relation), METHOD_INVALID); } HT_FREE(strval); } /* If we got any type information as well */ if (present[HTML_LINK_TYPE] && value[HTML_LINK_TYPE]) { if (HTAnchor_format(dest) == WWW_UNKNOWN) HTAnchor_setFormat(dest, (HTFormat) HTAtom_caseFor(value[HTML_LINK_TYPE])); } /* Call out to the layout engine */ HTextImp_foundLink(me->text, element_number, HTML_LINK_HREF, address, present, value); } break; case HTML_META: if (present[HTML_META_NAME] && value[HTML_META_NAME]) { HTAnchor_addMeta (me->node_anchor, value[HTML_META_NAME], (present[HTML_META_CONTENT] && value[HTML_META_CONTENT]) ? value[HTML_META_CONTENT] : ""); } break; case HTML_OBJECT: if (present[HTML_OBJECT_CLASSID] && value[HTML_OBJECT_CLASSID]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_OBJECT_CLASSID], NULL); HTextImp_foundLink(me->text, element_number, HTML_OBJECT_CLASSID, address, present, value); } if (present[HTML_OBJECT_CODEBASE] && value[HTML_OBJECT_CODEBASE]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_OBJECT_CODEBASE], NULL); HTextImp_foundLink(me->text, element_number, HTML_OBJECT_CODEBASE, address, present, value); } if (present[HTML_OBJECT_DATA] && value[HTML_OBJECT_DATA]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_OBJECT_DATA], NULL); HTextImp_foundLink(me->text, element_number, HTML_OBJECT_DATA, address, present, value); } if (present[HTML_OBJECT_ARCHIVE] && value[HTML_OBJECT_ARCHIVE]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_OBJECT_ARCHIVE], NULL); HTextImp_foundLink(me->text, element_number, HTML_OBJECT_ARCHIVE, address, present, value); } if (present[HTML_OBJECT_USEMAP] && value[HTML_OBJECT_USEMAP]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_OBJECT_USEMAP], NULL); HTextImp_foundLink(me->text, element_number, HTML_OBJECT_USEMAP, address, present, value); } break; case HTML_PRE: if (me->comment_end) HTextImp_addText(me->text, me->comment_end, strlen(me->comment_end)); break; case HTML_TITLE: HTChunk_truncate(me->title,0); break; } /* Update our parse stack */ if (SGML_findTagContents(me->dtd, element_number) != SGML_EMPTY) { if (me->sp == me->stack) { HTTRACE(SGML_TRACE, "HTML Parser. Maximum nesting of %d exceded!\n" _ MAX_NESTING); me->overflow++; return; } --(me->sp); me->sp[0] = element_number; } /* Call out to the layout engine */ HTextImp_beginElement(me->text, element_number, present, value); }