DocTocItem *MobiEngineImpl::GetTocTree()
{
    if (!tocReparsePoint)
        return NULL;

    EbookTocItem *root = NULL;
    ScopedMem<WCHAR> itemText;
    ScopedMem<WCHAR> itemLink;
    int itemLevel = 0;
    int idCounter = 0;

    // there doesn't seem to be a standard for Mobi ToCs, so we try to
    // determine the author's intentions by looking at commonly used tags
    HtmlPullParser parser(tocReparsePoint, str::Len(tocReparsePoint));
    HtmlToken *tok;
    while ((tok = parser.Next()) && !tok->IsError()) {
        if (itemLink && tok->IsText()) {
            ScopedMem<WCHAR> linkText(str::conv::FromHtmlUtf8(tok->s, tok->sLen));
            if (itemText)
                itemText.Set(str::Join(itemText, L" ", linkText));
            else
                itemText.Set(linkText.StealData());
        }
        else if (!tok->IsTag())
            continue;
        else if (Tag_Mbp_Pagebreak == tok->tag)
            break;
        else if (!itemLink && tok->IsStartTag() && Tag_A == tok->tag) {
            AttrInfo *attr = tok->GetAttrByName("filepos");
            if (!attr)
                attr = tok->GetAttrByName("href");
            if (attr)
                itemLink.Set(str::conv::FromHtmlUtf8(attr->val, attr->valLen));
        }
        else if (itemLink && tok->IsEndTag() && Tag_A == tok->tag) {
            PageDestination *dest = NULL;
            if (!itemText) {
                itemLink.Set(NULL);
                continue;
            }
            if (IsExternalUrl(itemLink))
                dest = new SimpleDest2(0, RectD(), itemLink.StealData());
            else
                dest = GetNamedDest(itemLink);
            EbookTocItem *item = new EbookTocItem(itemText.StealData(), dest);
            item->id = ++idCounter;
            item->open = itemLevel <= 2;
            AppendTocItem(root, item, itemLevel);
            itemLink.Set(NULL);
        }
        else if (Tag_Blockquote == tok->tag || Tag_Ul == tok->tag || Tag_Ol == tok->tag) {
            if (tok->IsStartTag())
                itemLevel++;
            else if (tok->IsEndTag() && itemLevel > 0)
                itemLevel--;
        }
    }

    return root;
}
static void Test02() {
    const char* s = "<p>Last paragraph";
    HtmlPullParser parser(s, str::Len(s));
    HtmlToken* t = parser.Next();
    utassert(t && t->IsTag() && t->IsStartTag() && Tag_P == t->tag);
    t = parser.Next();
    utassert(t && t->IsText() && str::EqNIx(t->s, t->sLen, "Last paragraph"));
}
// Parse s in place i.e. we assume we can modify it. Must be 0-terminated.
// The caller owns the memory for s.
HtmlElement *HtmlParser::ParseInPlace(char *s, UINT codepage)
{
    if (this->html)
        Reset();
    this->html = s;
    this->codepage = codepage;

    HtmlPullParser parser(s, strlen(s));
    HtmlToken *tok;

    while ((tok = parser.Next())) {
        char *tag = (char *)tok->s;
        if (tok->IsError()) {
            errorContext = tag;
            switch (tok->error) {
                case HtmlToken::UnclosedTag: return ParseError(ErrParsingElementName);
                case HtmlToken::InvalidTag:  return ParseError(ErrParsingClosingElement);
                default:                     return ParseError(ErrParsingElement);
            }
        }
        if (!tok->IsTag()) {
            // ignore text content
            assert(tok->IsText());
            continue;
        }
        char *tagEnd = tag + tok->nLen;
        if (!tok->IsEndTag()) {
            // note: call tok->NextAttr() before zero-terminating names and values
            AttrInfo *attr = tok->NextAttr();
            *tagEnd = '\0';
            StartTag(tag);

            while (attr) {
                char *name = (char *)attr->name;
                char *nameEnd = name + attr->nameLen;
                char *value = (char *)attr->val;
                char *valueEnd = value + attr->valLen;
                attr = tok->NextAttr();

                *nameEnd = *valueEnd = '\0';
                AppendAttr(name, value);
            }
        }
        if (!tok->IsStartTag() || IsTagSelfClosing(tok->tag)) {
            *tagEnd = '\0';
            CloseTag(tag);
        }
    }

    return rootElement;
}
// Return the next parsed page. Returns NULL if finished parsing.
// For simplicity of implementation, we parse xml text node or
// xml element at a time. This might cause a creation of one
// or more pages, which we remeber and send to the caller
// if we detect accumulated pages.
HtmlPage *HtmlFormatter::Next(bool skipEmptyPages)
{
    for (;;)
    {
        // send out all pages accumulated so far
        while (pagesToSend.Count() > 0) {
            HtmlPage *ret = pagesToSend.At(0);
            pagesToSend.RemoveAt(0);
            pageCount++;
            if (skipEmptyPages && IsEmptyPage(ret))
                delete ret;
            else
                return ret;
        }
        // we can call ourselves recursively to send outstanding
        // pages after parsing has finished so this is to detect
        // that case and really end parsing
        if (finishedParsing)
            return NULL;
        HtmlToken *t = htmlParser->Next();
        if (!t || t->IsError())
            break;

        currReparseIdx = t->GetReparsePoint() - htmlParser->Start();
        CrashIf(!ValidReparseIdx(currReparseIdx, htmlParser));
        if (t->IsTag())
            HandleHtmlTag(t);
        else if (!IgnoreText())
            HandleText(t);
    }
    // force layout of the last line
    AutoCloseTags(tagNesting.Count());
    FlushCurrLine(true);

    UpdateLinkBboxes(currPage);
    pagesToSend.Append(currPage);
    currPage = NULL;
    // call ourselves recursively to return accumulated pages
    finishedParsing = true;
    return Next();
}