void ComponentRepeat::notePositions(GlushkovBuildState &bs) { assert(m_max > 0); assert(m_max == NoLimit || m_max < MAX_MAX_BOUND); /* Note: We can construct smaller subgraphs if we're not maintaining edge * priorities. */ // We create one copy only through a recursive call to notePositions(), // first() and last(). Then we clone its positions and store the // appropriate firsts and lasts values for the copies. posFirst = bs.getBuilder().numVertices(); sub_comp->notePositions(bs); u32 copies = m_max < NoLimit ? m_max : MAX(m_min, 1); DEBUG_PRINTF("building %u copies of repeated region\n", copies); m_firsts.clear(); m_lasts.clear(); m_firsts.resize(copies); m_lasts.resize(copies); m_firsts[0] = sub_comp->first(); m_lasts[0] = sub_comp->last(); postSubNotePositionHook(); posLast = bs.getBuilder().numVertices() - 1; u32 vcount = posLast + 1 - posFirst; // If we're making more than one copy, then our firsts and lasts must only // contain vertices inside [posFirst, posLast]: anything else means we have // an embedded anchor or otherwise weird situation. if (copies > 1) { checkPositions(m_firsts[0], bs); checkPositions(m_lasts[0], bs); } // Avoid enormous expansions if (vcount * copies > MAX_POSITIONS_EXPANDED) { throw ParseError("Bounded repeat is too large."); } // Add positions for the rest of the copies size_t copyPositions = vcount * (copies - 1); bs.getBuilder().makePositions(copyPositions); // Calculate our firsts and lasts for the copies for (u32 i = 1; i < copies; ++i) { m_firsts[i] = m_firsts[0]; m_lasts[i] = m_lasts[0]; u32 base = i * vcount; addBase(base, m_firsts[i], m_lasts[i]); } recordPosBounds(posFirst, bs.getBuilder().numVertices()); precalc_firsts(); /* ComponentRepeat requires firsts to be calculated ahead * of time and cached due to expense */ }
void ComponentRepeat::buildFollowSet(GlushkovBuildState &bs, const vector<PositionInfo> &lastPos) { if (!m_max) { return; } DEBUG_PRINTF("enter\n"); // Wire up the first (the "real") entry DEBUG_PRINTF("initial repeat\n"); sub_comp->buildFollowSet(bs, lastPos); // Clone the subgraph we just added N times, where N is the minimum extent // of the graph minus one, wiring them up in a linear sequence u32 copies = m_firsts.size(); DEBUG_PRINTF("cloning %u copies of repeat\n", copies - 1); for (u32 rep = 1; rep < copies; rep++) { u32 offset = (posLast + 1 - posFirst) * rep; if (offset > 0) { bs.cloneFollowSet(posFirst, posLast, offset); } } wireRepeats(bs, lastPos); DEBUG_PRINTF("leave\n"); }
static void checkPositions(vector<PositionInfo> &v, const GlushkovBuildState &bs) { const NFABuilder& builder = bs.getBuilder(); for (const auto &e : v) { if (builder.isSpecialState(e.pos)) { throw ParseError("Embedded anchors not supported."); } } }
void AsciiComponentClass::notePositions(GlushkovBuildState &bs) { // We should always be finalized by now. assert(finalized); NFABuilder &builder = bs.getBuilder(); position = builder.makePositions(1); builder.addCharReach(position, cr); builder.setNodeReportID(position, 0 /* offset adj */); recordPosBounds(position, position + 1); }
void ComponentRepeat::wireRepeats(GlushkovBuildState &bs, const vector<PositionInfo> &lastPos) { /* note: m_lasts[0] already valid */ u32 copies = m_firsts.size(); const bool isEmpty = sub_comp->empty(); const vector<PositionInfo> &optLasts = m_min ? m_lasts[m_min - 1] : lastPos; if (!copies) { goto inf_check; } DEBUG_PRINTF("wiring up %u mand repeats\n", m_min); for (u32 rep = 1; rep < m_min; rep++) { bs.connectRegions(m_lasts[rep - 1], m_firsts[rep]); if (isEmpty) { m_lasts[rep].insert(m_lasts[rep].end(), m_lasts[rep - 1].begin(), m_lasts[rep - 1].end()); } } DEBUG_PRINTF("wiring up %d optional repeats\n", copies - m_min); for (u32 rep = MAX(m_min, 1); rep < copies; rep++) { vector<PositionInfo> lasts = m_lasts[rep - 1]; if (m_min && rep != m_min) { lasts.insert(lasts.end(), optLasts.begin(), optLasts.end()); sort(lasts.begin(), lasts.end()); lasts.erase(unique(lasts.begin(), lasts.end()), lasts.end()); } bs.connectRegions(lasts, m_firsts[rep]); } inf_check: // If we have no max bound, we need a self-loop as well. if (m_max == NoLimit) { DEBUG_PRINTF("final repeat self-loop\n"); bs.connectRegions(m_lasts.back(), m_firsts.back()); } }