Skip to content

Commit

Permalink
Invalidate view indexes when documents are purged
Browse files Browse the repository at this point in the history
After a document is purged, view indexes may contain obsolete rows that
were emitted by that document. If the doc is still in ForestDB's tree,
the right things will happen -- the fdb_del() call incremented the db's
lastSequence, triggering a view index update, and the deleted doc will
be enumerated and have its rows removed.
But if the doc has already been removed from ForestDB's tree, the
indexer won't see it at all and won't know to remove the obsolete rows.

The fix is based on what CouchDB does: we store a purgeCount in the db
which is incremented after every database compaction that purges deleted
docs. A view index also saves the current db purgeCount. If the
purgeCount has increased, the index is obsolete and needs to be erased
and rebuilt.

In the process of fixing this I also found some bugs in the C API
where deleted/purged docs weren't being handled properly: they were
skipped entirely, when they should be passed to the indexer even though
the client app doesn't map them. Fixed.

Fixes #1164
  • Loading branch information
snej committed Mar 17, 2016
1 parent e941871 commit cba8a28
Show file tree
Hide file tree
Showing 15 changed files with 256 additions and 69 deletions.
6 changes: 4 additions & 2 deletions C/c4Database.cc
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,10 @@ bool c4db_purgeDoc(C4Database *database, C4Slice docID, C4Error *outError) {
if (!database->mustBeInTransaction(outError))
return false;
try {
database->transaction()->del(docID);
return true;
if (database->transaction()->del(docID))
return true;
else
recordError(ForestDBDomain, FDB_RESULT_KEY_NOT_FOUND, outError);
} catchError(outError)
return false;
}
Expand Down
14 changes: 7 additions & 7 deletions C/c4DocEnumerator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,14 @@ struct C4DocEnumerator {
options.descending = (c4options.flags & kC4Descending) != 0;
options.inclusiveStart = (c4options.flags & kC4InclusiveStart) != 0;
options.inclusiveEnd = (c4options.flags & kC4InclusiveEnd) != 0;
options.includeDeleted = (c4options.flags & kC4IncludePurged) != 0;
// (Remember, ForestDB's 'deleted' is what CBL calls 'purged')
if ((c4options.flags & kC4IncludeBodies) == 0)
options.contentOptions = KeyStore::kMetaOnly;
return options;
}

typedef std::function<bool(slice docID, sequence sequence, slice docType)> filter;

void setFilter(const filter &f) {_filter = f;}
void setFilter(const EnumFilter &f) {_filter = f;}

C4Database* database() const {return _database;}

Expand Down Expand Up @@ -109,7 +109,7 @@ struct C4DocEnumerator {
// Return it anyway, without the kExists flag.
_docFlags = 0;
_docRevID = revid();
return true;
return (!_filter || _filter(_e.doc(), 0, slice::null));
}
VersionedDocument::Flags flags;
if (!VersionedDocument::readMeta(_e.doc(), flags, _docRevID, docType))
Expand All @@ -118,13 +118,13 @@ struct C4DocEnumerator {
auto optFlags = _options.flags;
return (optFlags & kC4IncludeDeleted || !(_docFlags & VersionedDocument::kDeleted))
&& (optFlags & kC4IncludeNonConflicted || (_docFlags & VersionedDocument::kConflicted))
&& (!_filter || _filter(_e.doc().key(), _e.doc().sequence(), docType));
&& (!_filter || _filter(_e.doc(), _docFlags, docType));
}

C4Database *_database;
DocEnumerator _e;
C4EnumeratorOptions _options;
filter _filter;
EnumFilter _filter;

C4DocumentFlags _docFlags;
revid _docRevID;
Expand Down Expand Up @@ -184,7 +184,7 @@ C4DocEnumerator* c4db_enumerateSomeDocs(C4Database *database,
}

namespace c4Internal {
void setEnumFilter(C4DocEnumerator *e, C4DocEnumerator::filter f) {
void setEnumFilter(C4DocEnumerator *e, EnumFilter f) {
e->setFilter(f);
}
}
Expand Down
14 changes: 12 additions & 2 deletions C/c4Impl.hh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "slice.hh"
#include "Database.hh"
#include "Collatable.hh"
#include "VersionedDocument.hh"
#include "Error.hh"
#include <functional>

Expand Down Expand Up @@ -78,8 +79,11 @@ namespace c4Internal {

const VersionedDocument& versionedDocument(C4Document*);

void setEnumFilter(C4DocEnumerator*,
std::function<bool(slice docID, sequence sequence, slice docType)> filter);
typedef std::function<bool(const Document&,
uint32_t documentFlags, // C4DocumentFlags
slice docType)> EnumFilter;

void setEnumFilter(C4DocEnumerator*, EnumFilter);
}

using namespace c4Internal;
Expand Down Expand Up @@ -133,4 +137,10 @@ struct c4KeyValueList {
std::vector<alloc_slice> values;
};


// Internal C4EnumeratorFlags value. Includes purged docs (what ForestDB calls 'deleted').
// Should only need to be used for the view indexer's enumerator.
static const uint16_t kC4IncludePurged = 0x8000;


#endif /* c4Impl_h */
27 changes: 14 additions & 13 deletions C/c4View.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ struct c4View {
C4Slice version)
:_sourceDB(sourceDB),
_viewDB((std::string)path, config),
_index(&_viewDB, (std::string)name, sourceDB->defaultKeyStore())
_index(&_viewDB, (std::string)name, sourceDB)
{
setVersion(version);
}
Expand Down Expand Up @@ -199,8 +199,7 @@ struct c4Indexer : public MapReduceIndexer {
virtual ~c4Indexer() { }

void addView(C4View *view) {
auto t = new Transaction(&view->_viewDB);
addIndex(&view->_index, t);
addIndex(&view->_index);
}

C4Database* _db;
Expand Down Expand Up @@ -240,22 +239,24 @@ C4DocEnumerator* c4indexer_enumerateDocuments(C4Indexer *indexer, C4Error *outEr
}

auto options = kC4DefaultEnumeratorOptions;
options.flags |= kC4IncludeDeleted;
options.flags |= kC4IncludeDeleted | kC4IncludePurged;
if (docTypes)
options.flags &= ~kC4IncludeBodies;
auto e = c4db_enumerateChanges(indexer->_db, startSequence-1, &options, outError);
if (!e)
return NULL;

if (docTypes) {
setEnumFilter(e, [docTypes,indexer](slice docID, sequence sequence, slice docType) {
if (docTypes->count(docType) > 0)
return true;
// We're skipping this doc, but we do have to update the index to _remove_ it
indexer->skipDoc(docID, sequence);
return false;
});
}
setEnumFilter(e, [docTypes,indexer](const Document &doc,
C4DocumentFlags flags,
slice docType) {
if ((flags & kExists) && !(flags & kDeleted)
&& (!docTypes || docTypes->count(docType) > 0))
return true;
// We're skipping this doc because it's either purged or deleted, or its docType
// doesn't match. But we do have to update the index to _remove_ it
indexer->skipDoc(doc.key(), doc.sequence());
return false;
});
return e;
} catchError(outError);
return NULL;
Expand Down
43 changes: 43 additions & 0 deletions C/tests/c4ViewTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,10 @@ class C4ViewTest : public C4Test {
sprintf(docID, "doc-%03d", i);
createRev(c4str(docID), kRevID, kBody);
}
updateIndex();
}

void updateIndex() {
C4Error error;
C4Indexer* ind = c4indexer_begin(db, &view, 1, &error);
Assert(ind);
Expand Down Expand Up @@ -141,6 +144,44 @@ class C4ViewTest : public C4Test {
AssertEqual(c4view_getLastSequenceChangedAt(view), (C4SequenceNumber)0);
}

void testDocPurge() {testDocPurge(false);}
void testDocPurgeWithCompact() {testDocPurge(true);}

void testDocPurge(bool compactAfterPurge) {
createIndex();
auto lastIndexed = c4view_getLastSequenceIndexed(view);
auto lastSeq = c4db_getLastSequence(db);
AssertEqual(lastIndexed, lastSeq);

// Purge one of the indexed docs:
C4Error err;
{
TransactionHelper t(db);
Assert(c4db_purgeDoc(db, c4str("doc-023"), &err));
}

if (compactAfterPurge)
Assert(c4db_compact(db, &err));

// ForestDB assigns sequences to deletions, so the purge bumped the db's sequence,
// invalidating the view index:
lastIndexed = c4view_getLastSequenceIndexed(view);
lastSeq = c4db_getLastSequence(db);
Assert(lastIndexed < lastSeq);

updateIndex();

// Verify that the purged doc is no longer in the index:
C4Error error;
auto e = c4view_query(view, NULL, &error);
Assert(e);
int i = 0;
while (c4queryenum_next(e, &error)) {
++i;
}
AssertEqual(i, 198); // 2 rows of doc-023 are gone
}

void createFullTextIndex(unsigned docCount) {
char docID[20];
for (unsigned i = 1; i <= docCount; i++) {
Expand Down Expand Up @@ -243,6 +284,8 @@ class C4ViewTest : public C4Test {
CPPUNIT_TEST( testCreateIndex );
CPPUNIT_TEST( testQueryIndex );
CPPUNIT_TEST( testIndexVersion );
CPPUNIT_TEST( testDocPurge );
CPPUNIT_TEST( testDocPurgeWithCompact );
CPPUNIT_TEST( testCreateFullTextIndex );
CPPUNIT_TEST( testQueryFullTextIndex );
CPPUNIT_TEST_SUITE_END();
Expand Down
39 changes: 37 additions & 2 deletions CBForest Tests/Database_Test.mm
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ - (void) test01_DbInfo {
Assert(info.file_size > 0);

AssertEq(db->lastSequence(), 0u);
AssertEq(db->purgeCount(), 0u);
}

- (void)test02_CreateDoc
Expand Down Expand Up @@ -366,10 +367,15 @@ - (void) test06_TransactionsThenIterate {
- (void) test07_DeleteKey {
slice key("a");
Transaction(db).set(key, nsstring_slice(@"A"));
AssertEq(db->lastSequence(), 1u);
AssertEq(db->purgeCount(), 0u);
Transaction(db).del(key);
Document doc = db->get(key);
// Assert(doc.deleted());
Assert(!doc.exists());
AssertEq(db->lastSequence(), 2u);
AssertEq(db->purgeCount(), 0u); // doesn't increment until after compaction
db->compact();
AssertEq(db->purgeCount(), 1u);
}

- (void) test07_DeleteDoc {
Expand All @@ -383,8 +389,37 @@ - (void) test07_DeleteDoc {
}

Document doc = db->get(key);
// Assert(doc.deleted());
// Assert(doc.deleted());
Assert(!doc.exists());

AssertEq(db->purgeCount(), 0u); // doesn't increment until after compaction
db->compact();
AssertEq(db->purgeCount(), 1u);
}

// Tests workaround for ForestDB bug MB-18753
- (void) test07_DeleteDocAndReopen {
slice key("a");
Transaction(db).set(key, nsstring_slice(@"A"));

{
Transaction t(db);
Document doc = db->get(key);
t.del(doc);
}

Document doc = db->get(key);
// Assert(doc.deleted());
Assert(!doc.exists());

auto config = db->getConfig();
delete db;
db = NULL;
db = new Database(dbPath, config);

Document doc2 = db->get(key);
// Assert(doc2.deleted());
Assert(!doc2.exists());
}

- (void) test08_KeyStoreInfo {
Expand Down
6 changes: 2 additions & 4 deletions CBForest Tests/GeoIndex_Test.mm
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

static void updateIndex(Database *indexDB, MapReduceIndex* index) {
MapReduceIndexer indexer;
indexer.addIndex(index, new Transaction(indexDB));
indexer.addIndex(index);
auto seq = indexer.startingSequence();
numMapCalls = 0;

Expand Down Expand Up @@ -59,7 +59,6 @@ @implementation GeoIndex_Test
{
std::string dbPath;
Database *db;
KeyStore source;
MapReduceIndex *index;
}

Expand All @@ -75,8 +74,7 @@ - (void) setUp {
CreateTestDir();
dbPath = PathForDatabaseNamed(@"geo_temp.fdb");
db = new Database(dbPath, TestDBConfig());
source = (KeyStore)*db;
index = new MapReduceIndex(db, "geo", source);
index = new MapReduceIndex(db, "geo", db);
}

- (void) tearDown {
Expand Down
Loading

0 comments on commit cba8a28

Please sign in to comment.