From 8e00f8e730caf497c1cce4644cb6815b75efb565 Mon Sep 17 00:00:00 2001 From: Joris Goosen Date: Tue, 3 Sep 2024 17:37:38 +0200 Subject: [PATCH] add some pragmas to speed it all up (#5651) dont add each column separately but just create the table from scratch, making sure that the filter-column is still first --- CommonData/column.cpp | 7 ++-- CommonData/column.h | 2 +- CommonData/databaseinterface.cpp | 63 +++++++++++++++++++++++--------- CommonData/databaseinterface.h | 5 ++- CommonData/dataset.cpp | 13 +++++-- CommonData/dataset.h | 4 +- 6 files changed, 67 insertions(+), 27 deletions(-) diff --git a/CommonData/column.cpp b/CommonData/column.cpp index 56db13a2dd..a12f0f3a89 100644 --- a/CommonData/column.cpp +++ b/CommonData/column.cpp @@ -82,7 +82,7 @@ void Column::dbDelete(bool cleanUpRest) { assert(_id != -1); - labelsClear(); + labelsClear(false); db().columnDelete(_id, cleanUpRest); _id = -1; @@ -649,13 +649,14 @@ void Column::_sortLabelsByOrder() std::sort(_labels.begin(), _labels.end(), [](const Label * l, const Label * r) { return l->order() < r->order(); }); } -void Column::labelsClear() +void Column::labelsClear(bool doIncRevision) { db().labelsClear(_id); _labels.clear(); _labelByIntsIdMap.clear(); - incRevision(false); + if(doIncRevision) + incRevision(false); } void Column::beginBatchedLabelsDB() diff --git a/CommonData/column.h b/CommonData/column.h index 0dddd1cbfe..64a4a3236d 100644 --- a/CommonData/column.h +++ b/CommonData/column.h @@ -109,7 +109,7 @@ class Column : public DataSetBaseNode void upgradeSetDoubleLabelsInInts(); ///< Used by upgrade 0.18.* -> 0.19 void upgradeExtractDoublesIntsFromLabels(); ///< Used by upgrade 0.18.* -> 0.19 - void labelsClear(); + void labelsClear(bool doIncRevision=true); int labelsAdd( int display); int labelsAdd( const std::string & display); int labelsAdd( const std::string & display, const std::string & description, const Json::Value & originalValue); diff --git a/CommonData/databaseinterface.cpp b/CommonData/databaseinterface.cpp index 7e9415c1bd..e0fea761fa 100644 --- a/CommonData/databaseinterface.cpp +++ b/CommonData/databaseinterface.cpp @@ -75,7 +75,7 @@ int DatabaseInterface::dataSetInsert(const std::string & dataFilePath, long data transactionWriteBegin(); int id = runStatementsId("INSERT INTO DataSets (dataFilePath, dataFileTimestamp, description, databaseJson, emptyValuesJson, dataFileSynch) VALUES (?, ?, ?, ?, ?, ?) RETURNING id;", prepare); - runStatements("CREATE TABLE " + dataSetName(id) + " (rowNumber INTEGER PRIMARY KEY);"); + runStatements("CREATE TABLE " + dataSetName(id) + " (rowNumber INTEGER PRIMARY KEY);"); // Can be overwritten through dataSetCreateTable transactionWriteEnd(); return id; @@ -183,6 +183,7 @@ void DatabaseInterface::filterDelete(int filterIndex) if(dataSetId != -1) runStatements("ALTER TABLE " + dataSetName(dataSetId) + " DROP COLUMN " + filterName(filterIndex) + ";"); + runStatements("DELETE FROM Filters WHERE id = " + std::to_string(filterIndex) + ";"); transactionWriteEnd(); @@ -391,7 +392,7 @@ void DatabaseInterface::filterWrite(int filterIndex, const std::vector & v transactionWriteEnd(); } -int DatabaseInterface::columnInsert(int dataSetId, int index, const std::string & name, columnType colType) +int DatabaseInterface::columnInsert(int dataSetId, int index, const std::string & name, columnType colType, bool alterTable) { JASPTIMER_SCOPE(DatabaseInterface::columnInsert); transactionWriteBegin(); @@ -419,19 +420,38 @@ int DatabaseInterface::columnInsert(int dataSetId, int index, const std::string Log::log() << "Inserting column failed!" << std::endl; #endif - //Add a scalar and ordinal/nominal column to DataSet_# for the column - const std::string alterDatasetPrefix = "ALTER TABLE " + dataSetName(dataSetId); - const std::string addColumnFragment = " ADD " + columnBaseName(columnId); - - runStatements(alterDatasetPrefix + addColumnFragment + "_DBL REAL NULL;"); - runStatements(alterDatasetPrefix + addColumnFragment + "_INT INT NULL;"); - + + if(alterTable) //If not then via dataSetCreateTable + { + //Add a scalar and ordinal/nominal column to DataSet_# for the column + const std::string alterDatasetPrefix = "ALTER TABLE " + dataSetName(dataSetId); + const std::string addColumnFragment = " ADD " + columnBaseName(columnId); + + runStatements(alterDatasetPrefix + addColumnFragment + "_DBL REAL NULL;"); + runStatements(alterDatasetPrefix + addColumnFragment + "_INT INT NULL;"); + } + //The labels will be added separately later transactionWriteEnd(); return columnId; } +void DatabaseInterface::dataSetCreateTable(DataSet * dataSet) +{ + runStatements("DROP TABLE " + dataSetName(dataSet->id()) + ";"); + + std::stringstream statements; + statements << "CREATE TABLE " + dataSetName(dataSet->id()) + " (rowNumber INTEGER PRIMARY KEY, "+ filterName(dataSet->filter()->id()) + " INT NOT NULL DEFAULT 1"; + + for(Column * column : dataSet->columns()) + statements << ", " << columnBaseName(column->id()) << "_DBL REAL NULL, " << columnBaseName(column->id()) << "_INT INT NULL"; + + statements << ");"; + + runStatements(statements.str()); +} + int DatabaseInterface::columnGetDataSetId(int columnId) { JASPTIMER_SCOPE(DatabaseInterface::columnGetDataSetId); @@ -1438,19 +1458,19 @@ void DatabaseInterface::_runStatements(const std::string & statements, bindParam } while(remain > 1 && (ret == SQLITE_OK && ret != SQLITE_DONE)); + const int maxLenStatementError = 200; + std::string shortStatements = statements.size() <= maxLenStatementError ? statements : statements.substr(0, maxLenStatementError); + if(ret == SQLITE_ERROR) { - std::string errorMsg = "Running ```\n"+statements+"\n``` failed because of: `" + sqlite3_errmsg(_db); - Log::log() << errorMsg << std::endl; - - throw std::runtime_error(errorMsg); + Log::log() << "Running ```\n"+statements +"\n``` failed because of: `" + sqlite3_errmsg(_db) << std::endl; + throw std::runtime_error( "Running ```\n"+shortStatements +"\n``` failed because of: `" + sqlite3_errmsg(_db)); } if(ret == SQLITE_READONLY) { - std::string errorMsg = "Running ```\n"+statements+"\n``` failed because the database is readonly..."; - Log::log() << errorMsg << std::endl; - throw std::runtime_error(errorMsg); + Log::log() << "Running ```\n"+statements +"\n``` failed because the database is readonly..." << std::endl; + throw std::runtime_error( "Running ```\n"+shortStatements +"\n``` failed because the database is readonly..."); } } @@ -1570,7 +1590,8 @@ void DatabaseInterface::create() else Log::log() << "Opened internal sqlite database for creation at '" << dbFile() << "'." << std::endl; - + dbStartUpPragmas(); + transactionWriteBegin(); runStatements(_dbConstructionSql); transactionWriteEnd(); @@ -1593,6 +1614,14 @@ void DatabaseInterface::load() } else Log::log() << "Opened internal sqlite database for loading at '" << dbFile() << "'." << std::endl; + + dbStartUpPragmas(); +} + +void DatabaseInterface::dbStartUpPragmas() +{ + runStatements("pragma journal_mode = WAL;"); + runStatements("pragma synchronous = normal;"); } void DatabaseInterface::close() diff --git a/CommonData/databaseinterface.h b/CommonData/databaseinterface.h index b4fdfad644..98dfaed722 100644 --- a/CommonData/databaseinterface.h +++ b/CommonData/databaseinterface.h @@ -87,6 +87,7 @@ class DatabaseInterface int dataSetGetRevision( int dataSetId); int dataSetGetFilter( int dataSetId); void dataSetInsertEmptyRow( int dataSetId, size_t row); + void dataSetCreateTable( DataSet * dataSet); ///< Assumes you are importing fresh data and havent created any DataSet_? table yet void dataSetBatchedValuesUpdate(DataSet * data, std::vector columns, std::function progressCallback = [](float){}); void dataSetBatchedValuesUpdate(DataSet * data, std::function progressCallback = [](float){}); @@ -109,7 +110,7 @@ class DatabaseInterface //Columns & Data/Values //Index stuff: - int columnInsert( int dataSetId, int index = -1, const std::string & name = "", columnType colType = columnType::unknown); ///< Insert a row into Columns and create the corresponding columns in DataSet_? Also makes sure the indices are correct + int columnInsert( int dataSetId, int index = -1, const std::string & name = "", columnType colType = columnType::unknown, bool alterTable=true); ///< Insert a row into Columns and create the corresponding columns in DataSet_? Also makes sure the indices are correct int columnLastFreeIndex( int dataSetId); void columnIndexIncrements( int dataSetId, int index); ///< If index already is in use that column and all after are incremented by 1 void columnIndexDecrements( int dataSetId, int index); ///< Indices bigger than index are decremented, assumption is that the previous one using it has been removed already @@ -158,6 +159,7 @@ class DatabaseInterface void transactionReadBegin(); ///< runs BEGIN DEFERRED and waits for sqlite to not be busy anymore if some other process is writing Tracks whether nested and only does BEGIN+COMMIT at lowest depth void transactionReadEnd(); ///< runs COMMIT and ends the transaction. Tracks whether nested and only does BEGIN+COMMIT at lowest depth + private: void _doubleTroubleBinder(sqlite3_stmt *stmt, int param, double dbl); ///< Needed to work around the lack of support for NAN, INF and NEG_INF in sqlite, converts those to string to make use of sqlite flexibility double _doubleTroubleReader(sqlite3_stmt *stmt, int colI); ///< The reading counterpart to _doubleTroubleBinder to convert string representations of NAN, INF and NEG_INF back to double @@ -168,6 +170,7 @@ class DatabaseInterface void load(); ///< Loads a sqlite database from sessiondir (after loading a jaspfile) void close(); ///< Closes the loaded database and disconnects bool tableHasColumn(const std::string & tableName, const std::string & columnName); + void dbStartUpPragmas(); int _transactionWriteDepth = 0, _transactionReadDepth = 0; diff --git a/CommonData/dataset.cpp b/CommonData/dataset.cpp index 5dde9073e4..59f82d2301 100644 --- a/CommonData/dataset.cpp +++ b/CommonData/dataset.cpp @@ -59,6 +59,7 @@ void DataSet::dbDelete() _dataSetID = -1; + db().transactionWriteEnd(); } @@ -171,12 +172,12 @@ void DataSet::removeColumn(const std::string & name) } } -void DataSet::insertColumn(size_t index) +void DataSet::insertColumn(size_t index, bool alterDataSetTable) { assert(_dataSetID > 0); - Column * newColumn = new Column(this, db().columnInsert(_dataSetID, index)); + Column * newColumn = new Column(this, db().columnInsert(_dataSetID, index, "", columnType::unknown, alterDataSetTable)); _columns.insert(_columns.begin()+index, newColumn); @@ -382,18 +383,24 @@ void DataSet::setColumnCount(size_t colCount) db().transactionWriteBegin(); int curCount = columns().size(); + + bool alterTableAfterwards = curCount == 0 && colCount > 0; if(colCount > curCount) for(size_t i=curCount; i=colCount; i--) removeColumn(i); + incRevision(); db().transactionWriteEnd(); + + if(alterTableAfterwards) + db().dataSetCreateTable(this); } void DataSet::setRowCount(size_t rowCount) diff --git a/CommonData/dataset.h b/CommonData/dataset.h index 62f5bacffc..04d59b3252 100644 --- a/CommonData/dataset.h +++ b/CommonData/dataset.h @@ -40,11 +40,11 @@ class DataSet : public DataSetBaseNode void beginBatchedToDB(); void endBatchedToDB(std::function progressCallback = [](float){}, Columns columns={}); void endBatchedToDB(Columns columns) { endBatchedToDB([](float){}, columns); } - + void removeColumn( const std::string & name ); void removeColumn( size_t index ); void removeColumnById( size_t id ); - void insertColumn( size_t index ); + void insertColumn( size_t index, bool alterDataSetTable = true); Column * newColumn( const std::string & name); int getColumnIndex( const std::string & name ) const; int columnIndex( const Column * col ) const;