Skip to content

Commit

Permalink
add some pragmas to speed it all up (#5651)
Browse files Browse the repository at this point in the history
dont add each column separately but just create the table from scratch, making sure that the filter-column is still first
  • Loading branch information
JorisGoosen committed Sep 30, 2024
1 parent 1b3b4b3 commit 8e00f8e
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 27 deletions.
7 changes: 4 additions & 3 deletions CommonData/column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ void Column::dbDelete(bool cleanUpRest)
{
assert(_id != -1);

labelsClear();
labelsClear(false);
db().columnDelete(_id, cleanUpRest);

_id = -1;
Expand Down Expand Up @@ -649,13 +649,14 @@ void Column::_sortLabelsByOrder()
std::sort(_labels.begin(), _labels.end(), [](const Label * l, const Label * r) { return l->order() < r->order(); });
}

void Column::labelsClear()
void Column::labelsClear(bool doIncRevision)
{
db().labelsClear(_id);
_labels.clear();
_labelByIntsIdMap.clear();

incRevision(false);
if(doIncRevision)
incRevision(false);
}

void Column::beginBatchedLabelsDB()
Expand Down
2 changes: 1 addition & 1 deletion CommonData/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class Column : public DataSetBaseNode
void upgradeSetDoubleLabelsInInts(); ///< Used by upgrade 0.18.* -> 0.19
void upgradeExtractDoublesIntsFromLabels(); ///< Used by upgrade 0.18.* -> 0.19

void labelsClear();
void labelsClear(bool doIncRevision=true);
int labelsAdd( int display);
int labelsAdd( const std::string & display);
int labelsAdd( const std::string & display, const std::string & description, const Json::Value & originalValue);
Expand Down
63 changes: 46 additions & 17 deletions CommonData/databaseinterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ int DatabaseInterface::dataSetInsert(const std::string & dataFilePath, long data

transactionWriteBegin();
int id = runStatementsId("INSERT INTO DataSets (dataFilePath, dataFileTimestamp, description, databaseJson, emptyValuesJson, dataFileSynch) VALUES (?, ?, ?, ?, ?, ?) RETURNING id;", prepare);
runStatements("CREATE TABLE " + dataSetName(id) + " (rowNumber INTEGER PRIMARY KEY);");
runStatements("CREATE TABLE " + dataSetName(id) + " (rowNumber INTEGER PRIMARY KEY);"); // Can be overwritten through dataSetCreateTable
transactionWriteEnd();

return id;
Expand Down Expand Up @@ -183,6 +183,7 @@ void DatabaseInterface::filterDelete(int filterIndex)

if(dataSetId != -1)
runStatements("ALTER TABLE " + dataSetName(dataSetId) + " DROP COLUMN " + filterName(filterIndex) + ";");

runStatements("DELETE FROM Filters WHERE id = " + std::to_string(filterIndex) + ";");

transactionWriteEnd();
Expand Down Expand Up @@ -391,7 +392,7 @@ void DatabaseInterface::filterWrite(int filterIndex, const std::vector<bool> & v
transactionWriteEnd();
}

int DatabaseInterface::columnInsert(int dataSetId, int index, const std::string & name, columnType colType)
int DatabaseInterface::columnInsert(int dataSetId, int index, const std::string & name, columnType colType, bool alterTable)
{
JASPTIMER_SCOPE(DatabaseInterface::columnInsert);
transactionWriteBegin();
Expand Down Expand Up @@ -419,19 +420,38 @@ int DatabaseInterface::columnInsert(int dataSetId, int index, const std::string
Log::log() << "Inserting column failed!" << std::endl;
#endif

//Add a scalar and ordinal/nominal column to DataSet_# for the column
const std::string alterDatasetPrefix = "ALTER TABLE " + dataSetName(dataSetId);
const std::string addColumnFragment = " ADD " + columnBaseName(columnId);

runStatements(alterDatasetPrefix + addColumnFragment + "_DBL REAL NULL;");
runStatements(alterDatasetPrefix + addColumnFragment + "_INT INT NULL;");


if(alterTable) //If not then via dataSetCreateTable
{
//Add a scalar and ordinal/nominal column to DataSet_# for the column
const std::string alterDatasetPrefix = "ALTER TABLE " + dataSetName(dataSetId);
const std::string addColumnFragment = " ADD " + columnBaseName(columnId);

runStatements(alterDatasetPrefix + addColumnFragment + "_DBL REAL NULL;");
runStatements(alterDatasetPrefix + addColumnFragment + "_INT INT NULL;");
}

//The labels will be added separately later

transactionWriteEnd();
return columnId;
}

void DatabaseInterface::dataSetCreateTable(DataSet * dataSet)
{
runStatements("DROP TABLE " + dataSetName(dataSet->id()) + ";");

std::stringstream statements;
statements << "CREATE TABLE " + dataSetName(dataSet->id()) + " (rowNumber INTEGER PRIMARY KEY, "+ filterName(dataSet->filter()->id()) + " INT NOT NULL DEFAULT 1";

for(Column * column : dataSet->columns())
statements << ", " << columnBaseName(column->id()) << "_DBL REAL NULL, " << columnBaseName(column->id()) << "_INT INT NULL";

statements << ");";

runStatements(statements.str());
}

int DatabaseInterface::columnGetDataSetId(int columnId)
{
JASPTIMER_SCOPE(DatabaseInterface::columnGetDataSetId);
Expand Down Expand Up @@ -1438,19 +1458,19 @@ void DatabaseInterface::_runStatements(const std::string & statements, bindParam
}
while(remain > 1 && (ret == SQLITE_OK && ret != SQLITE_DONE));

const int maxLenStatementError = 200;
std::string shortStatements = statements.size() <= maxLenStatementError ? statements : statements.substr(0, maxLenStatementError);

if(ret == SQLITE_ERROR)
{
std::string errorMsg = "Running ```\n"+statements+"\n``` failed because of: `" + sqlite3_errmsg(_db);
Log::log() << errorMsg << std::endl;

throw std::runtime_error(errorMsg);
Log::log() << "Running ```\n"+statements +"\n``` failed because of: `" + sqlite3_errmsg(_db) << std::endl;
throw std::runtime_error( "Running ```\n"+shortStatements +"\n``` failed because of: `" + sqlite3_errmsg(_db));
}

if(ret == SQLITE_READONLY)
{
std::string errorMsg = "Running ```\n"+statements+"\n``` failed because the database is readonly...";
Log::log() << errorMsg << std::endl;
throw std::runtime_error(errorMsg);
Log::log() << "Running ```\n"+statements +"\n``` failed because the database is readonly..." << std::endl;
throw std::runtime_error( "Running ```\n"+shortStatements +"\n``` failed because the database is readonly...");
}
}

Expand Down Expand Up @@ -1570,7 +1590,8 @@ void DatabaseInterface::create()
else
Log::log() << "Opened internal sqlite database for creation at '" << dbFile() << "'." << std::endl;


dbStartUpPragmas();

transactionWriteBegin();
runStatements(_dbConstructionSql);
transactionWriteEnd();
Expand All @@ -1593,6 +1614,14 @@ void DatabaseInterface::load()
}
else
Log::log() << "Opened internal sqlite database for loading at '" << dbFile() << "'." << std::endl;

dbStartUpPragmas();
}

void DatabaseInterface::dbStartUpPragmas()
{
runStatements("pragma journal_mode = WAL;");
runStatements("pragma synchronous = normal;");
}

void DatabaseInterface::close()
Expand Down
5 changes: 4 additions & 1 deletion CommonData/databaseinterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class DatabaseInterface
int dataSetGetRevision( int dataSetId);
int dataSetGetFilter( int dataSetId);
void dataSetInsertEmptyRow( int dataSetId, size_t row);
void dataSetCreateTable( DataSet * dataSet); ///< Assumes you are importing fresh data and havent created any DataSet_? table yet

void dataSetBatchedValuesUpdate(DataSet * data, std::vector<Column*> columns, std::function<void(float)> progressCallback = [](float){});
void dataSetBatchedValuesUpdate(DataSet * data, std::function<void(float)> progressCallback = [](float){});
Expand All @@ -109,7 +110,7 @@ class DatabaseInterface

//Columns & Data/Values
//Index stuff:
int columnInsert( int dataSetId, int index = -1, const std::string & name = "", columnType colType = columnType::unknown); ///< Insert a row into Columns and create the corresponding columns in DataSet_? Also makes sure the indices are correct
int columnInsert( int dataSetId, int index = -1, const std::string & name = "", columnType colType = columnType::unknown, bool alterTable=true); ///< Insert a row into Columns and create the corresponding columns in DataSet_? Also makes sure the indices are correct
int columnLastFreeIndex( int dataSetId);
void columnIndexIncrements( int dataSetId, int index); ///< If index already is in use that column and all after are incremented by 1
void columnIndexDecrements( int dataSetId, int index); ///< Indices bigger than index are decremented, assumption is that the previous one using it has been removed already
Expand Down Expand Up @@ -158,6 +159,7 @@ class DatabaseInterface
void transactionReadBegin(); ///< runs BEGIN DEFERRED and waits for sqlite to not be busy anymore if some other process is writing Tracks whether nested and only does BEGIN+COMMIT at lowest depth
void transactionReadEnd(); ///< runs COMMIT and ends the transaction. Tracks whether nested and only does BEGIN+COMMIT at lowest depth


private:
void _doubleTroubleBinder(sqlite3_stmt *stmt, int param, double dbl); ///< Needed to work around the lack of support for NAN, INF and NEG_INF in sqlite, converts those to string to make use of sqlite flexibility
double _doubleTroubleReader(sqlite3_stmt *stmt, int colI); ///< The reading counterpart to _doubleTroubleBinder to convert string representations of NAN, INF and NEG_INF back to double
Expand All @@ -168,6 +170,7 @@ class DatabaseInterface
void load(); ///< Loads a sqlite database from sessiondir (after loading a jaspfile)
void close(); ///< Closes the loaded database and disconnects
bool tableHasColumn(const std::string & tableName, const std::string & columnName);
void dbStartUpPragmas();

int _transactionWriteDepth = 0,
_transactionReadDepth = 0;
Expand Down
13 changes: 10 additions & 3 deletions CommonData/dataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ void DataSet::dbDelete()

_dataSetID = -1;


db().transactionWriteEnd();
}

Expand Down Expand Up @@ -171,12 +172,12 @@ void DataSet::removeColumn(const std::string & name)
}
}

void DataSet::insertColumn(size_t index)
void DataSet::insertColumn(size_t index, bool alterDataSetTable)
{

assert(_dataSetID > 0);

Column * newColumn = new Column(this, db().columnInsert(_dataSetID, index));
Column * newColumn = new Column(this, db().columnInsert(_dataSetID, index, "", columnType::unknown, alterDataSetTable));

_columns.insert(_columns.begin()+index, newColumn);

Expand Down Expand Up @@ -382,18 +383,24 @@ void DataSet::setColumnCount(size_t colCount)
db().transactionWriteBegin();

int curCount = columns().size();

bool alterTableAfterwards = curCount == 0 && colCount > 0;

if(colCount > curCount)
for(size_t i=curCount; i<colCount; i++)
insertColumn(i);
insertColumn(i, !alterTableAfterwards);

else if(colCount < curCount)
for(size_t i=curCount-1; i>=colCount; i--)
removeColumn(i);


incRevision();

db().transactionWriteEnd();

if(alterTableAfterwards)
db().dataSetCreateTable(this);
}

void DataSet::setRowCount(size_t rowCount)
Expand Down
4 changes: 2 additions & 2 deletions CommonData/dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ class DataSet : public DataSetBaseNode
void beginBatchedToDB();
void endBatchedToDB(std::function<void(float)> progressCallback = [](float){}, Columns columns={});
void endBatchedToDB(Columns columns) { endBatchedToDB([](float){}, columns); }

void removeColumn( const std::string & name );
void removeColumn( size_t index );
void removeColumnById( size_t id );
void insertColumn( size_t index );
void insertColumn( size_t index, bool alterDataSetTable = true);
Column * newColumn( const std::string & name);
int getColumnIndex( const std::string & name ) const;
int columnIndex( const Column * col ) const;
Expand Down

0 comments on commit 8e00f8e

Please sign in to comment.