Example #1
0
void HText_beginAnchor (HText * text, HTChildAnchor * anchor)
{
    TextAnchor * a;
    if (text && anchor) {
	Robot * mr = (Robot *) HTRequest_context(text->request);
	HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor);
	HTParentAnchor * dest_parent = HTAnchor_parent(dest);
	char * uri = HTAnchor_address((HTAnchor *) dest_parent);

#if 0
	if (SHOW_MSG) HTTrace("Robot....... Found `%s\' \n", uri ? uri : "NULL");
#endif	
	if (uri) {
	    HTList_addObject(mr->urilist, (void *) uri);
	    mr->count++;
	}

	if ((a = (TextAnchor  *) HT_MALLOC(sizeof(*a))) == NULL)
	    HT_OUTOFMEM("HText_beginAnchor");
	if (text->last_anchor) {
	    text->last_anchor->next = a;
	} else {
	    text->first_anchor = a;
	}
	a->next = 0;
	a->anchor = anchor;
	text->last_anchor = a;
    
	if (HTAnchor_followMainLink((HTAnchor*)anchor)) {
	    a->number = ++(text->anchors);
	} else {
	    a->number = 0;
	}
    }
}
Example #2
0
/*	Start an anchor field
*/
PUBLIC void LMHText_beginAnchor (HText * text,
    int elem_num, int attr_num, HTChildAnchor * anc,
    const BOOL *present, const char **value)
{
    TextAnchor * a;

			/* this is because it's called as link callback */
    if (elem_num != HTML_A)
	return;

    if ((a = (TextAnchor  *) HT_MALLOC(sizeof(*a))) == NULL)
        HT_OUTOFMEM("HText_beginAnchor");
    a->start = text->chars + text->last_line->size;
    a->extent = 0;
    if (text->last_anchor) {
        text->last_anchor->next = a;
    } else {
        text->first_anchor = a;
    }
    a->next = 0;
    a->anchor = anc;
    text->last_anchor = a;
     text->current_anchor = a;
    
    if (HTAnchor_followMainLink((HTAnchor*)anc)) {
        a->number = ++(text->last_anchor_number);
    } else {
        a->number = 0;
    }
}
Example #3
0
HTAnchor * HTHistory_moveBy
 ARGS1 (int,offset)
{
  HTAnchor * last = HTList_objectAt (history, 1);
  if (! last)
    return NULL;  /* No last visited node */
  if (last != (HTAnchor *) last->parent) {  /* Was a child */
    HTList * kids = last->parent->children;
    int i = HTList_indexOf (kids, last); 
    HTAnchor * nextOne = HTList_objectAt (kids, i - offset);
    if (nextOne) {
      HTAnchor * destination = HTAnchor_followMainLink (nextOne);
      if (destination) {
	HTList_removeLastObject (history);
	HTList_removeLastObject (history);
	HTList_addObject (history, nextOne);
	HTList_addObject (history, destination);
      }
      return destination;
    } else {
      if (TRACE) fprintf(stderr, 
      		"HTHistory_moveBy: offset by %+d goes out of list %p.\n",
		offset, kids);
      return NULL;
    }
  } else {  /* Was a parent */
    return NULL;  /* FIXME we could possibly follow the next link... */
  }
}
Example #4
0
char *Reference_List (HText *text, BOOL titles)
{
    char *temp = malloc(1000);
    char *output = malloc(1000);
    int refs = HText_sourceAnchors(text);
    if (refs <= 0) {
        return("\n\nThere are no references from this document.\n\n");
    } else {
        int cnt;
        StrAllocCat(output,"\n*** References from this document ***\n");
        for (cnt=1; cnt<=refs; cnt++) {
            HTAnchor *dest =
                HTAnchor_followMainLink((HTAnchor *)
                                        HText_childNumber(text, cnt));
            HTParentAnchor * parent = HTAnchor_parent(dest);
            char * address =  HTAnchor_address(dest);
            const char * title = titles ? HTAnchor_title(parent) : NULL;
            sprintf(temp, "[%d] ", cnt);
            StrAllocCat(output, temp);
	    sprintf(temp, "%s\n",
                    (char *)(title ? title : address));
            StrAllocCat(output, temp);
            HT_FREE(address);
        }
    }
}      
Example #5
0
void HText_appendImage (HText * text, HTChildAnchor * anchor,
			const char *alt, const char * align, BOOL isMap)
{
    if (text && anchor) {
        HTParentAnchor * dest = (HTParentAnchor *) HTAnchor_followMainLink((HTAnchor *) anchor);
	char * uri = HTAnchor_address((HTAnchor *) dest);
	if (SHOW_MSG) {
	    HTTrace("Image %s", uri);
	}
	HT_FREE(uri);
    }
}
Example #6
0
PRIVATE void foundLink (HText * text,
			int element_number, int attribute_number,
			HTChildAnchor * anchor,
			const BOOL * present, const char ** value)
{
    if (anchor) {
	/*
	**  Find out which link we got. The anchor we are passed is
	**  a child anchor of the anchor we are current parsing. We
	**  have to go from this child anchor to the actual destination.
	*/
	HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor);
	char * address = HTAnchor_address(dest);
	HTPrint("Found link `%s\'\n", address);
	HT_FREE(address);
    }
}
Example #7
0
PRIVATE void HTML_start_element (HTStructured *	me,
				 int		element_number,
				 const BOOL * 	present,
				 const char **	value)
{
    HTChildAnchor * address = NULL;
    if (!me->started) {
	HTextImp_build(me->text, HTEXT_BEGIN);
	me->started = YES;
    }

    /* Look at what element was started */
    switch (element_number) {
    case HTML_A:
	if (present[HTML_A_HREF] && value[HTML_A_HREF]) {
	    address = HTAnchor_findChildAndLink(
		me->node_anchor,					/* parent */
		present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL,	/* Tag */
		value[HTML_A_HREF],					/* Addresss */
		present[HTML_A_REL] && value[HTML_A_REL] ? 
		(HTLinkType) HTAtom_caseFor(value[HTML_A_REL]) : NULL);
	    
	    if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
		HTLink * link = HTAnchor_mainLink((HTAnchor *) address);
		HTParentAnchor * dest = HTAnchor_parent(HTLink_destination(link));
		if (!HTAnchor_title(dest)) HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
	    }
	    HTextImp_foundLink(me->text, element_number, HTML_A_HREF,
			       address, present, value);
	    HTTRACE(SGML_TRACE, "HTML Parser. Anchor `%s\'\n" _ value[HTML_A_HREF]);
	}
	break;

    case HTML_AREA:
	if (present[HTML_AREA_HREF] && value[HTML_AREA_HREF]) {
	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
						value[HTML_AREA_HREF], NULL);
	    HTextImp_foundLink(me->text, element_number, HTML_AREA_HREF,
			       address, present, value);
	    HTTRACE(SGML_TRACE, "HTML Parser. Image map area `%s\'\n" _ value[HTML_AREA_HREF]);
	}
	break;

    case HTML_BASE:
	if (present[HTML_BASE_HREF] && value[HTML_BASE_HREF]) {
	    HTAnchor_setBase(me->node_anchor, (char *) value[HTML_BASE_HREF]);
	    HTTRACE(SGML_TRACE, "HTML Parser. New base `%s\'\n" _ value[HTML_BASE_HREF]);
	}
	break;

    case HTML_BODY:
	if (present[HTML_BODY_BACKGROUND] && value[HTML_BODY_BACKGROUND]) {
	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
						value[HTML_BODY_BACKGROUND], NULL);
	    HTextImp_foundLink(me->text, element_number, HTML_BODY_BACKGROUND,
			       address, present, value);
	    HTTRACE(SGML_TRACE, "HTML Parser. Background `%s\'\n" _ value[HTML_BODY_BACKGROUND]);
	}
	break;

    case HTML_FORM:
	if (present[HTML_FORM_ACTION] && value[HTML_FORM_ACTION]) {
	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
						value[HTML_FORM_ACTION], NULL);
	    HTextImp_foundLink(me->text, element_number, HTML_FORM_ACTION,
			       address, present, value);
	}
	break;

    case HTML_FRAME:
	if (present[HTML_FRAME_SRC] && value[HTML_FRAME_SRC]) {
	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
						value[HTML_FRAME_SRC], NULL);
	    HTextImp_foundLink(me->text, element_number, HTML_FRAME_SRC,
			       address, present, value);
	    HTTRACE(SGML_TRACE, "HTML Parser. Frame `%s\'\n" _ value[HTML_FRAME_SRC]);
	}
	break;
	
    case HTML_INPUT:
	if (present[HTML_INPUT_SRC] && value[HTML_INPUT_SRC]) {
	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
						value[HTML_INPUT_SRC], NULL);
	    HTextImp_foundLink(me->text, element_number, HTML_INPUT_SRC,
			       address, present, value);
	}
	break;

    case HTML_IMG:
	if (present[HTML_IMG_SRC] && value[HTML_IMG_SRC]) {
	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
						value[HTML_IMG_SRC], NULL);
	    HTextImp_foundLink(me->text, element_number, HTML_IMG_SRC,
			       address, present, value);
	}
	break;

    case HTML_ISINDEX:
   	HTAnchor_setIndex(me->node_anchor);
	break;
	
    case HTML_LINK:
	if (present[HTML_LINK_HREF] && value[HTML_LINK_HREF]) {
	    HTParentAnchor * dest = NULL;
	    address = HTAnchor_findChildAndLink(
		me->node_anchor,					/* parent */
		present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL,	/* Tag */
		present[HTML_A_HREF] ? value[HTML_A_HREF] : NULL,	/* Addresss */
		NULL);							/* Rels */
	    dest = HTAnchor_parent(HTAnchor_followMainLink((HTAnchor *) address));

	    /* If forward reference */
	    if ((present[HTML_LINK_REL] && value[HTML_LINK_REL])) {
		char * strval = NULL;
		char * ptr = NULL;
		char * relation = NULL;
		StrAllocCopy(strval, value[HTML_LINK_REL]);
		ptr = strval;
		while ((relation = HTNextLWSToken(&ptr)) != NULL) {
		    HTLink_add((HTAnchor *) me->node_anchor, (HTAnchor *) dest,
			       (HTLinkType) HTAtom_caseFor(relation),
			       METHOD_INVALID);
		}
		HT_FREE(strval);
	    }

	    /* If reverse reference */
	    if ((present[HTML_LINK_REV] && value[HTML_LINK_REV])) {
		char * strval = NULL;
		char * ptr = NULL;
		char * relation = NULL;
		StrAllocCopy(strval, value[HTML_LINK_REV]);
		ptr = strval;
		while ((relation = HTNextLWSToken(&ptr)) != NULL) {
		    HTLink_add((HTAnchor *) dest, (HTAnchor *) me->node_anchor,
			       (HTLinkType) HTAtom_caseFor(relation),
			       METHOD_INVALID);
		}
		HT_FREE(strval);
	    }

	    /* If we got any type information as well */
	    if (present[HTML_LINK_TYPE] && value[HTML_LINK_TYPE]) {
		if (HTAnchor_format(dest) == WWW_UNKNOWN)
		    HTAnchor_setFormat(dest,
				       (HTFormat) HTAtom_caseFor(value[HTML_LINK_TYPE]));
	    }

	    /* Call out to the layout engine */
	    HTextImp_foundLink(me->text, element_number, HTML_LINK_HREF,
			       address, present, value);
	}
	break;

    case HTML_META:
	if (present[HTML_META_NAME] && value[HTML_META_NAME]) {
	    HTAnchor_addMeta (me->node_anchor,
			      value[HTML_META_NAME],
			      (present[HTML_META_CONTENT] && value[HTML_META_CONTENT]) ?
			      value[HTML_META_CONTENT] : "");
	}
	break;

    case HTML_OBJECT:
	if (present[HTML_OBJECT_CLASSID] && value[HTML_OBJECT_CLASSID]) {
	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
						value[HTML_OBJECT_CLASSID], NULL);
	    HTextImp_foundLink(me->text, element_number, HTML_OBJECT_CLASSID,
			       address, present, value);
	}

	if (present[HTML_OBJECT_CODEBASE] && value[HTML_OBJECT_CODEBASE]) {
	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
						value[HTML_OBJECT_CODEBASE], NULL);
	    HTextImp_foundLink(me->text, element_number, HTML_OBJECT_CODEBASE,
			       address, present, value);
	}

	if (present[HTML_OBJECT_DATA] && value[HTML_OBJECT_DATA]) {
	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
						value[HTML_OBJECT_DATA], NULL);
	    HTextImp_foundLink(me->text, element_number, HTML_OBJECT_DATA,
			       address, present, value);
	}

	if (present[HTML_OBJECT_ARCHIVE] && value[HTML_OBJECT_ARCHIVE]) {
	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
						value[HTML_OBJECT_ARCHIVE], NULL);
	    HTextImp_foundLink(me->text, element_number, HTML_OBJECT_ARCHIVE,
			       address, present, value);
	}

	if (present[HTML_OBJECT_USEMAP] && value[HTML_OBJECT_USEMAP]) {
	    address = HTAnchor_findChildAndLink(me->node_anchor, NULL,
						value[HTML_OBJECT_USEMAP], NULL);
	    HTextImp_foundLink(me->text, element_number, HTML_OBJECT_USEMAP,
			       address, present, value);
	}
	break;

    case HTML_PRE:
    	if (me->comment_end)
	    HTextImp_addText(me->text, me->comment_end, strlen(me->comment_end));
	break;

    case HTML_TITLE:
        HTChunk_truncate(me->title,0);
	break;
    }

    /* Update our parse stack */
    if (SGML_findTagContents(me->dtd, element_number) != SGML_EMPTY) {
        if (me->sp == me->stack) {
	    HTTRACE(SGML_TRACE, "HTML Parser. Maximum nesting of %d exceded!\n" _ MAX_NESTING); 
	    me->overflow++;
	    return;
	}
    	--(me->sp);
	me->sp[0] = element_number;
    }	

    /* Call out to the layout engine */
    HTextImp_beginElement(me->text, element_number, present, value);
}