void bdb_cache_return_entry_rw( struct bdb_info *bdb, Entry *e, int rw, DB_LOCK *lock ) { EntryInfo *ei; int free = 0; ei = e->e_private; if ( ei && ( ei->bei_state & CACHE_ENTRY_NOT_CACHED )) { bdb_cache_entryinfo_lock( ei ); if ( ei->bei_state & CACHE_ENTRY_NOT_CACHED ) { /* Releasing the entry can only be done when * we know that nobody else is using it, i.e we * should have an entry_db writelock. But the * flag is only set by the thread that loads the * entry, and only if no other threads has found * it while it was working. All other threads * clear the flag, which mean that we should be * the only thread using the entry if the flag * is set here. */ ei->bei_e = NULL; ei->bei_state ^= CACHE_ENTRY_NOT_CACHED; free = 1; } bdb_cache_entryinfo_unlock( ei ); } bdb_cache_entry_db_unlock( bdb, lock ); if ( free ) { e->e_private = NULL; bdb_entry_return( e ); } }
int bdb_cache_find_id( Operation *op, DB_TXN *tid, ID id, EntryInfo **eip, int flag, DB_LOCK *lock ) { struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private; Entry *ep = NULL; int rc = 0, load = 0; EntryInfo ei = { 0 }; ei.bei_id = id; #ifdef SLAP_ZONE_ALLOC slap_zh_rlock(bdb->bi_cache.c_zctx); #endif /* If we weren't given any info, see if we have it already cached */ if ( !*eip ) { again: ldap_pvt_thread_rdwr_rlock( &bdb->bi_cache.c_rwlock ); *eip = (EntryInfo *) avl_find( bdb->bi_cache.c_idtree, (caddr_t) &ei, bdb_id_cmp ); if ( *eip ) { /* If the lock attempt fails, the info is in use */ if ( bdb_cache_entryinfo_trylock( *eip )) { int del = (*eip)->bei_state & CACHE_ENTRY_DELETED; ldap_pvt_thread_rdwr_runlock( &bdb->bi_cache.c_rwlock ); /* If this node is being deleted, treat * as if the delete has already finished */ if ( del ) { return DB_NOTFOUND; } /* otherwise, wait for the info to free up */ ldap_pvt_thread_yield(); goto again; } /* If this info isn't hooked up to its parent yet, * unlock and wait for it to be fully initialized */ if ( (*eip)->bei_state & CACHE_ENTRY_NOT_LINKED ) { bdb_cache_entryinfo_unlock( *eip ); ldap_pvt_thread_rdwr_runlock( &bdb->bi_cache.c_rwlock ); ldap_pvt_thread_yield(); goto again; } flag |= ID_LOCKED; } ldap_pvt_thread_rdwr_runlock( &bdb->bi_cache.c_rwlock ); } /* See if the ID exists in the database; add it to the cache if so */ if ( !*eip ) { #ifndef BDB_HIER rc = bdb_id2entry( op->o_bd, tid, id, &ep ); if ( rc == 0 ) { rc = bdb_cache_find_ndn( op, tid, &ep->e_nname, eip ); if ( *eip ) flag |= ID_LOCKED; if ( rc ) { ep->e_private = NULL; #ifdef SLAP_ZONE_ALLOC bdb_entry_return( bdb, ep, (*eip)->bei_zseq ); #else bdb_entry_return( ep ); #endif ep = NULL; } } #else rc = hdb_cache_find_parent(op, tid, id, eip ); if ( rc == 0 ) flag |= ID_LOCKED; #endif } /* Ok, we found the info, do we have the entry? */ if ( rc == 0 ) { if ( !( flag & ID_LOCKED )) { bdb_cache_entryinfo_lock( *eip ); flag |= ID_LOCKED; } if ( (*eip)->bei_state & CACHE_ENTRY_DELETED ) { rc = DB_NOTFOUND; } else { (*eip)->bei_finders++; (*eip)->bei_state |= CACHE_ENTRY_REFERENCED; if ( flag & ID_NOENTRY ) { bdb_cache_entryinfo_unlock( *eip ); return 0; } /* Make sure only one thread tries to load the entry */ load1: #ifdef SLAP_ZONE_ALLOC if ((*eip)->bei_e && !slap_zn_validate( bdb->bi_cache.c_zctx, (*eip)->bei_e, (*eip)->bei_zseq)) { (*eip)->bei_e = NULL; (*eip)->bei_zseq = 0; } #endif if ( !(*eip)->bei_e && !((*eip)->bei_state & CACHE_ENTRY_LOADING)) { load = 1; (*eip)->bei_state |= CACHE_ENTRY_LOADING; flag |= ID_CHKPURGE; } if ( !load ) { /* Clear the uncached state if we are not * loading it, i.e it is already cached or * another thread is currently loading it. */ if ( (*eip)->bei_state & CACHE_ENTRY_NOT_CACHED ) { (*eip)->bei_state ^= CACHE_ENTRY_NOT_CACHED; flag |= ID_CHKPURGE; } } if ( flag & ID_LOCKED ) { bdb_cache_entryinfo_unlock( *eip ); flag ^= ID_LOCKED; } rc = bdb_cache_entry_db_lock( bdb, tid, *eip, load, 0, lock ); if ( (*eip)->bei_state & CACHE_ENTRY_DELETED ) { rc = DB_NOTFOUND; bdb_cache_entry_db_unlock( bdb, lock ); bdb_cache_entryinfo_lock( *eip ); (*eip)->bei_finders--; bdb_cache_entryinfo_unlock( *eip ); } else if ( rc == 0 ) { if ( load ) { if ( !ep) { rc = bdb_id2entry( op->o_bd, tid, id, &ep ); } if ( rc == 0 ) { ep->e_private = *eip; #ifdef BDB_HIER while ( (*eip)->bei_state & CACHE_ENTRY_NOT_LINKED ) ldap_pvt_thread_yield(); bdb_fix_dn( ep, 0 ); #endif bdb_cache_entryinfo_lock( *eip ); (*eip)->bei_e = ep; #ifdef SLAP_ZONE_ALLOC (*eip)->bei_zseq = *((ber_len_t *)ep - 2); #endif ep = NULL; if ( flag & ID_NOCACHE ) { /* Set the cached state only if no other thread * found the info while we were loading the entry. */ if ( (*eip)->bei_finders == 1 ) { (*eip)->bei_state |= CACHE_ENTRY_NOT_CACHED; flag ^= ID_CHKPURGE; } } bdb_cache_entryinfo_unlock( *eip ); bdb_cache_lru_link( bdb, *eip ); } if ( rc == 0 ) { /* If we succeeded, downgrade back to a readlock. */ rc = bdb_cache_entry_db_relock( bdb, tid, *eip, 0, 0, lock ); } else { /* Otherwise, release the lock. */ bdb_cache_entry_db_unlock( bdb, lock ); } } else if ( !(*eip)->bei_e ) { /* Some other thread is trying to load the entry, * wait for it to finish. */ bdb_cache_entry_db_unlock( bdb, lock ); bdb_cache_entryinfo_lock( *eip ); flag |= ID_LOCKED; goto load1; #ifdef BDB_HIER } else { /* Check for subtree renames */ rc = bdb_fix_dn( (*eip)->bei_e, 1 ); if ( rc ) { bdb_cache_entry_db_relock( bdb, tid, *eip, 1, 0, lock ); /* check again in case other modifier did it already */ if ( bdb_fix_dn( (*eip)->bei_e, 1 ) ) rc = bdb_fix_dn( (*eip)->bei_e, 2 ); bdb_cache_entry_db_relock( bdb, tid, *eip, 0, 0, lock ); } #endif } bdb_cache_entryinfo_lock( *eip ); (*eip)->bei_finders--; if ( load ) (*eip)->bei_state ^= CACHE_ENTRY_LOADING; bdb_cache_entryinfo_unlock( *eip ); } } } if ( flag & ID_LOCKED ) { bdb_cache_entryinfo_unlock( *eip ); } if ( ep ) { ep->e_private = NULL; #ifdef SLAP_ZONE_ALLOC bdb_entry_return( bdb, ep, (*eip)->bei_zseq ); #else bdb_entry_return( ep ); #endif } if ( rc == 0 ) { int purge = 0; if (( flag & ID_CHKPURGE ) || bdb->bi_cache.c_eimax ) { ldap_pvt_thread_mutex_lock( &bdb->bi_cache.c_count_mutex ); if ( flag & ID_CHKPURGE ) { bdb->bi_cache.c_cursize++; if ( !bdb->bi_cache.c_purging && bdb->bi_cache.c_cursize > bdb->bi_cache.c_maxsize ) { purge = 1; bdb->bi_cache.c_purging = 1; } } else if ( !bdb->bi_cache.c_purging && bdb->bi_cache.c_eimax && bdb->bi_cache.c_leaves > bdb->bi_cache.c_eimax ) { purge = 1; bdb->bi_cache.c_purging = 1; } ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_count_mutex ); } if ( purge ) bdb_cache_lru_purge( bdb ); } #ifdef SLAP_ZONE_ALLOC if (rc == 0 && (*eip)->bei_e) { slap_zn_rlock(bdb->bi_cache.c_zctx, (*eip)->bei_e); } slap_zh_runlock(bdb->bi_cache.c_zctx); #endif return rc; }
/* This is best-effort only. If all entries in the cache are * busy, they will all be kept. This is unlikely to happen * unless the cache is very much smaller than the working set. */ static void bdb_cache_lru_purge( struct bdb_info *bdb ) { DB_LOCK lock, *lockp; EntryInfo *elru, *elnext = NULL; int islocked; ID eicount, ecount; ID count, efree, eifree = 0; #ifdef LDAP_DEBUG int iter; #endif /* Wait for the mutex; we're the only one trying to purge. */ ldap_pvt_thread_mutex_lock( &bdb->bi_cache.c_lru_mutex ); if ( bdb->bi_cache.c_cursize > bdb->bi_cache.c_maxsize ) { efree = bdb->bi_cache.c_cursize - bdb->bi_cache.c_maxsize; efree += bdb->bi_cache.c_minfree; } else { efree = 0; } /* maximum number of EntryInfo leaves to cache. In slapcat * we always free all leaf nodes. */ if ( slapMode & SLAP_TOOL_READONLY ) { eifree = bdb->bi_cache.c_leaves; } else if ( bdb->bi_cache.c_eimax && bdb->bi_cache.c_leaves > bdb->bi_cache.c_eimax ) { eifree = bdb->bi_cache.c_minfree * 10; if ( eifree >= bdb->bi_cache.c_leaves ) eifree /= 2; } if ( !efree && !eifree ) { ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_lru_mutex ); bdb->bi_cache.c_purging = 0; return; } if ( bdb->bi_cache.c_txn ) { lockp = &lock; } else { lockp = NULL; } count = 0; eicount = 0; ecount = 0; #ifdef LDAP_DEBUG iter = 0; #endif /* Look for an unused entry to remove */ for ( elru = bdb->bi_cache.c_lruhead; elru; elru = elnext ) { elnext = elru->bei_lrunext; if ( bdb_cache_entryinfo_trylock( elru )) goto bottom; /* This flag implements the clock replacement behavior */ if ( elru->bei_state & ( CACHE_ENTRY_REFERENCED )) { elru->bei_state &= ~CACHE_ENTRY_REFERENCED; bdb_cache_entryinfo_unlock( elru ); goto bottom; } /* If this node is in the process of linking into the cache, * or this node is being deleted, skip it. */ if (( elru->bei_state & ( CACHE_ENTRY_NOT_LINKED | CACHE_ENTRY_DELETED | CACHE_ENTRY_LOADING | CACHE_ENTRY_ONELEVEL )) || elru->bei_finders > 0 ) { bdb_cache_entryinfo_unlock( elru ); goto bottom; } if ( bdb_cache_entryinfo_trylock( elru->bei_parent )) { bdb_cache_entryinfo_unlock( elru ); goto bottom; } /* entryinfo is locked */ islocked = 1; /* If we can successfully writelock it, then * the object is idle. */ if ( bdb_cache_entry_db_lock( bdb, bdb->bi_cache.c_txn, elru, 1, 1, lockp ) == 0 ) { /* Free entry for this node if it's present */ if ( elru->bei_e ) { ecount++; /* the cache may have gone over the limit while we * weren't looking, so double check. */ if ( !efree && ecount > bdb->bi_cache.c_maxsize ) efree = bdb->bi_cache.c_minfree; if ( count < efree ) { elru->bei_e->e_private = NULL; #ifdef SLAP_ZONE_ALLOC bdb_entry_return( bdb, elru->bei_e, elru->bei_zseq ); #else bdb_entry_return( elru->bei_e ); #endif elru->bei_e = NULL; count++; } else { /* Keep this node cached, skip to next */ bdb_cache_entry_db_unlock( bdb, lockp ); goto next; } } bdb_cache_entry_db_unlock( bdb, lockp ); /* * If it is a leaf node, and we're over the limit, free it. */ if ( elru->bei_kids ) { /* Drop from list, we ignore it... */ LRU_DEL( &bdb->bi_cache, elru ); } else if ( eicount < eifree ) { /* Too many leaf nodes, free this one */ bdb_cache_delete_internal( &bdb->bi_cache, elru, 0 ); bdb_cache_delete_cleanup( &bdb->bi_cache, elru ); islocked = 0; eicount++; } /* Leave on list until we need to free it */ } next: if ( islocked ) { bdb_cache_entryinfo_unlock( elru ); bdb_cache_entryinfo_unlock( elru->bei_parent ); } if ( count >= efree && eicount >= eifree ) break; bottom: if ( elnext == bdb->bi_cache.c_lruhead ) break; #ifdef LDAP_DEBUG iter++; #endif } if ( count || ecount > bdb->bi_cache.c_cursize ) { ldap_pvt_thread_mutex_lock( &bdb->bi_cache.c_count_mutex ); /* HACK: we seem to be losing track, fix up now */ if ( ecount > bdb->bi_cache.c_cursize ) bdb->bi_cache.c_cursize = ecount; bdb->bi_cache.c_cursize -= count; ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_count_mutex ); } bdb->bi_cache.c_lruhead = elnext; ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_lru_mutex ); bdb->bi_cache.c_purging = 0; }
/* Find the EntryInfo for the requested DN. If the DN cannot be found, return * the info for its closest ancestor. *res should be NULL to process a * complete DN starting from the tree root. Otherwise *res must be the * immediate parent of the requested DN, and only the RDN will be searched. * The EntryInfo is locked upon return and must be unlocked by the caller. */ int bdb_cache_find_ndn( Operation *op, DB_TXN *txn, struct berval *ndn, EntryInfo **res ) { struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private; EntryInfo ei, *eip, *ei2; int rc = 0; char *ptr; /* this function is always called with normalized DN */ if ( *res ) { /* we're doing a onelevel search for an RDN */ ei.bei_nrdn.bv_val = ndn->bv_val; ei.bei_nrdn.bv_len = dn_rdnlen( op->o_bd, ndn ); eip = *res; } else { /* we're searching a full DN from the root */ ptr = ndn->bv_val + ndn->bv_len - op->o_bd->be_nsuffix[0].bv_len; ei.bei_nrdn.bv_val = ptr; ei.bei_nrdn.bv_len = op->o_bd->be_nsuffix[0].bv_len; /* Skip to next rdn if suffix is empty */ if ( ei.bei_nrdn.bv_len == 0 ) { for (ptr = ei.bei_nrdn.bv_val - 2; ptr > ndn->bv_val && !DN_SEPARATOR(*ptr); ptr--) /* empty */; if ( ptr >= ndn->bv_val ) { if (DN_SEPARATOR(*ptr)) ptr++; ei.bei_nrdn.bv_len = ei.bei_nrdn.bv_val - ptr; ei.bei_nrdn.bv_val = ptr; } } eip = &bdb->bi_cache.c_dntree; } for ( bdb_cache_entryinfo_lock( eip ); eip; ) { eip->bei_state |= CACHE_ENTRY_REFERENCED; ei.bei_parent = eip; ei2 = (EntryInfo *)avl_find( eip->bei_kids, &ei, bdb_rdn_cmp ); if ( !ei2 ) { DB_LOCK lock; int len = ei.bei_nrdn.bv_len; if ( BER_BVISEMPTY( ndn )) { *res = eip; return LDAP_SUCCESS; } ei.bei_nrdn.bv_len = ndn->bv_len - (ei.bei_nrdn.bv_val - ndn->bv_val); eip->bei_finders++; bdb_cache_entryinfo_unlock( eip ); BDB_LOG_PRINTF( bdb->bi_dbenv, NULL, "slapd Reading %s", ei.bei_nrdn.bv_val ); lock.mode = DB_LOCK_NG; rc = bdb_dn2id( op, &ei.bei_nrdn, &ei, txn, &lock ); if (rc) { bdb_cache_entryinfo_lock( eip ); eip->bei_finders--; bdb_cache_entry_db_unlock( bdb, &lock ); *res = eip; return rc; } BDB_LOG_PRINTF( bdb->bi_dbenv, NULL, "slapd Read got %s(%d)", ei.bei_nrdn.bv_val, ei.bei_id ); /* DN exists but needs to be added to cache */ ei.bei_nrdn.bv_len = len; rc = bdb_entryinfo_add_internal( bdb, &ei, &ei2 ); /* add_internal left eip and c_rwlock locked */ eip->bei_finders--; ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock ); bdb_cache_entry_db_unlock( bdb, &lock ); if ( rc ) { *res = eip; return rc; } } else if ( ei2->bei_state & CACHE_ENTRY_DELETED ) { /* In the midst of deleting? Give it a chance to * complete. */ bdb_cache_entryinfo_unlock( eip ); ldap_pvt_thread_yield(); bdb_cache_entryinfo_lock( eip ); *res = eip; return DB_NOTFOUND; } bdb_cache_entryinfo_lock( ei2 ); bdb_cache_entryinfo_unlock( eip ); eip = ei2; /* Advance to next lower RDN */ for (ptr = ei.bei_nrdn.bv_val - 2; ptr > ndn->bv_val && !DN_SEPARATOR(*ptr); ptr--) /* empty */; if ( ptr >= ndn->bv_val ) { if (DN_SEPARATOR(*ptr)) ptr++; ei.bei_nrdn.bv_len = ei.bei_nrdn.bv_val - ptr - 1; ei.bei_nrdn.bv_val = ptr; } if ( ptr < ndn->bv_val ) { *res = eip; break; } } return rc; }
void bdb_cache_return_entry_rw ( DB_ENV *env, Cache *cache, Entry *e, int rw, DB_LOCK *lock ) { ID id; int refcnt, freeit = 1; /* set cache write lock */ ldap_pvt_thread_rdwr_wlock( &cache->c_rwlock ); assert( e->e_private ); bdb_cache_entry_db_unlock( env, lock ); #if 0 bdb_cache_entry_rdwr_unlock(e, rw); #endif id = e->e_id; refcnt = --BEI(e)->bei_refcnt; /* * if the entry is returned when in CREATING state, it is deleted * but not freed because it may belong to someone else (do_add, * for instance) */ if ( BEI(e)->bei_state == CACHE_ENTRY_CREATING ) { /* set lru mutex */ ldap_pvt_thread_mutex_lock( &cache->lru_mutex ); bdb_cache_delete_entry_internal( cache, e ); /* free lru mutex */ ldap_pvt_thread_mutex_unlock( &cache->lru_mutex ); freeit = 0; /* now the entry is in DELETED state */ } if ( BEI(e)->bei_state == CACHE_ENTRY_COMMITTED ) { BEI(e)->bei_state = CACHE_ENTRY_READY; /* free cache write lock */ ldap_pvt_thread_rdwr_wunlock( &cache->c_rwlock ); #ifdef NEW_LOGGING LDAP_LOG( CACHE, DETAIL1, "bdb_cache_return_entry_rw: return (%ld):%s, refcnt=%d\n", id, rw ? "w" : "r", refcnt ); #else Debug( LDAP_DEBUG_TRACE, "====> bdb_cache_return_entry_%s( %ld ): created (%d)\n", rw ? "w" : "r", id, refcnt ); #endif } else if ( BEI(e)->bei_state == CACHE_ENTRY_DELETED ) { if( refcnt > 0 ) { /* free cache write lock */ ldap_pvt_thread_rdwr_wunlock( &cache->c_rwlock ); #ifdef NEW_LOGGING LDAP_LOG( CACHE, DETAIL1, "bdb_cache_return_entry_rw: %ld, delete pending (%d).\n", id, refcnt, 0 ); #else Debug( LDAP_DEBUG_TRACE, "====> bdb_cache_return_entry_%s( %ld ): delete pending (%d)\n", rw ? "w" : "r", id, refcnt ); #endif } else { bdb_cache_entry_private_destroy( e ); if ( freeit ) { bdb_entry_return( e ); } /* free cache write lock */ ldap_pvt_thread_rdwr_wunlock( &cache->c_rwlock ); #ifdef NEW_LOGGING LDAP_LOG( CACHE, DETAIL1, "bdb_cache_return_entry_rw: (%ld): deleted (%d)\n", id, refcnt, 0 ); #else Debug( LDAP_DEBUG_TRACE, "====> bdb_cache_return_entry_%s( %ld ): deleted (%d)\n", rw ? "w" : "r", id, refcnt ); #endif } } else { /* free cache write lock */ ldap_pvt_thread_rdwr_wunlock( &cache->c_rwlock ); #ifdef NEW_LOGGING LDAP_LOG( CACHE, DETAIL1, "bdb_cache_return_entry_rw: ID %ld:%s returned (%d)\n", id, rw ? "w": "r", refcnt ); #else Debug( LDAP_DEBUG_TRACE, "====> bdb_cache_return_entry_%s( %ld ): returned (%d)\n", rw ? "w" : "r", id, refcnt); #endif } }