Skip to content

Commit

Permalink
add some pragmas to speed it all up
Browse files Browse the repository at this point in the history
dont add each column separately but just create the table from scratch, making sure that the filter-column is still first
  • Loading branch information
JorisGoosen committed Sep 3, 2024
1 parent 0685f80 commit 26c89fc
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 27 deletions.
7 changes: 4 additions & 3 deletions CommonData/column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ void Column::dbDelete(bool cleanUpRest)
{
assert(_id != -1);

labelsClear();
labelsClear(false);
db().columnDelete(_id, cleanUpRest);

_id = -1;
Expand Down Expand Up @@ -649,13 +649,14 @@ void Column::_sortLabelsByOrder()
std::sort(_labels.begin(), _labels.end(), [](const Label * l, const Label * r) { return l->order() < r->order(); });
}

void Column::labelsClear()
void Column::labelsClear(bool doIncRevision)
{
db().labelsClear(_id);
_labels.clear();
_labelByIntsIdMap.clear();

incRevision(false);
if(doIncRevision)
incRevision(false);
}

void Column::beginBatchedLabelsDB()
Expand Down
2 changes: 1 addition & 1 deletion CommonData/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class Column : public DataSetBaseNode
void upgradeSetDoubleLabelsInInts(); ///< Used by upgrade 0.18.* -> 0.19
void upgradeExtractDoublesIntsFromLabels(); ///< Used by upgrade 0.18.* -> 0.19

void labelsClear();
void labelsClear(bool doIncRevision=true);
int labelsAdd( int display);
int labelsAdd( const std::string & display);
int labelsAdd( const std::string & display, const std::string & description, const Json::Value & originalValue);
Expand Down
63 changes: 46 additions & 17 deletions CommonData/databaseinterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ int DatabaseInterface::dataSetInsert(const std::string & dataFilePath, long data

transactionWriteBegin();
int id = runStatementsId("INSERT INTO DataSets (dataFilePath, dataFileTimestamp, description, databaseJson, emptyValuesJson, dataFileSynch) VALUES (?, ?, ?, ?, ?, ?) RETURNING id;", prepare);
runStatements("CREATE TABLE " + dataSetName(id) + " (rowNumber INTEGER PRIMARY KEY);");
runStatements("CREATE TABLE " + dataSetName(id) + " (rowNumber INTEGER PRIMARY KEY);"); // Can be overwritten through dataSetCreateTable
transactionWriteEnd();

return id;
Expand Down Expand Up @@ -183,6 +183,7 @@ void DatabaseInterface::filterDelete(int filterIndex)

if(dataSetId != -1)
runStatements("ALTER TABLE " + dataSetName(dataSetId) + " DROP COLUMN " + filterName(filterIndex) + ";");

runStatements("DELETE FROM Filters WHERE id = " + std::to_string(filterIndex) + ";");

transactionWriteEnd();
Expand Down Expand Up @@ -391,7 +392,7 @@ void DatabaseInterface::filterWrite(int filterIndex, const std::vector<bool> & v
transactionWriteEnd();
}

int DatabaseInterface::columnInsert(int dataSetId, int index, const std::string & name, columnType colType)
int DatabaseInterface::columnInsert(int dataSetId, int index, const std::string & name, columnType colType, bool alterTable)
{
JASPTIMER_SCOPE(DatabaseInterface::columnInsert);
transactionWriteBegin();
Expand Down Expand Up @@ -419,19 +420,38 @@ int DatabaseInterface::columnInsert(int dataSetId, int index, const std::string
Log::log() << "Inserting column failed!" << std::endl;
#endif

//Add a scalar and ordinal/nominal column to DataSet_# for the column
const std::string alterDatasetPrefix = "ALTER TABLE " + dataSetName(dataSetId);
const std::string addColumnFragment = " ADD " + columnBaseName(columnId);

runStatements(alterDatasetPrefix + addColumnFragment + "_DBL REAL NULL;");
runStatements(alterDatasetPrefix + addColumnFragment + "_INT INT NULL;");


if(alterTable) //If not then via dataSetCreateTable
{
//Add a scalar and ordinal/nominal column to DataSet_# for the column
const std::string alterDatasetPrefix = "ALTER TABLE " + dataSetName(dataSetId);
const std::string addColumnFragment = " ADD " + columnBaseName(columnId);

runStatements(alterDatasetPrefix + addColumnFragment + "_DBL REAL NULL;");
runStatements(alterDatasetPrefix + addColumnFragment + "_INT INT NULL;");
}

//The labels will be added separately later

transactionWriteEnd();
return columnId;
}

void DatabaseInterface::dataSetCreateTable(DataSet * dataSet)
{
runStatements("DROP TABLE " + dataSetName(dataSet->id()) + ";");

std::stringstream statements;
statements << "CREATE TABLE " + dataSetName(dataSet->id()) + " (rowNumber INTEGER PRIMARY KEY, "+ filterName(dataSet->filter()->id()) + " INT NOT NULL DEFAULT 1";

for(Column * column : dataSet->columns())
statements << ", " << columnBaseName(column->id()) << "_DBL REAL NULL, " << columnBaseName(column->id()) << "_INT INT NULL";

statements << ");";

runStatements(statements.str());
}

int DatabaseInterface::columnGetDataSetId(int columnId)
{
JASPTIMER_SCOPE(DatabaseInterface::columnGetDataSetId);
Expand Down Expand Up @@ -1438,19 +1458,19 @@ void DatabaseInterface::_runStatements(const std::string & statements, bindParam
}
while(remain > 1 && (ret == SQLITE_OK && ret != SQLITE_DONE));

const int maxLenStatementError = 200;
std::string shortStatements = statements.size() <= maxLenStatementError ? statements : statements.substr(0, maxLenStatementError);

if(ret == SQLITE_ERROR)
{
std::string errorMsg = "Running ```\n"+statements+"\n``` failed because of: `" + sqlite3_errmsg(_db);
Log::log() << errorMsg << std::endl;

throw std::runtime_error(errorMsg);
Log::log() << "Running ```\n"+statements +"\n``` failed because of: `" + sqlite3_errmsg(_db) << std::endl;
throw std::runtime_error( "Running ```\n"+shortStatements +"\n``` failed because of: `" + sqlite3_errmsg(_db));
}

if(ret == SQLITE_READONLY)
{
std::string errorMsg = "Running ```\n"+statements+"\n``` failed because the database is readonly...";
Log::log() << errorMsg << std::endl;
throw std::runtime_error(errorMsg);
Log::log() << "Running ```\n"+statements +"\n``` failed because the database is readonly..." << std::endl;
throw std::runtime_error( "Running ```\n"+shortStatements +"\n``` failed because the database is readonly...");
}
}

Expand Down Expand Up @@ -1570,7 +1590,8 @@ void DatabaseInterface::create()
else
Log::log() << "Opened internal sqlite database for creation at '" << dbFile() << "'." << std::endl;


dbStartUpPragmas();

transactionWriteBegin();
runStatements(_dbConstructionSql);
transactionWriteEnd();
Expand All @@ -1593,6 +1614,14 @@ void DatabaseInterface::load()
}
else
Log::log() << "Opened internal sqlite database for loading at '" << dbFile() << "'." << std::endl;

dbStartUpPragmas();
}

void DatabaseInterface::dbStartUpPragmas()
{
runStatements("pragma journal_mode = WAL;");
runStatements("pragma synchronous = normal;");
}

void DatabaseInterface::close()
Expand Down
5 changes: 4 additions & 1 deletion CommonData/databaseinterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class DatabaseInterface
int dataSetGetRevision( int dataSetId);
int dataSetGetFilter( int dataSetId);
void dataSetInsertEmptyRow( int dataSetId, size_t row);
void dataSetCreateTable( DataSet * dataSet); ///< Assumes you are importing fresh data and havent created any DataSet_? table yet

void dataSetBatchedValuesUpdate(DataSet * data, std::vector<Column*> columns, std::function<void(float)> progressCallback = [](float){});
void dataSetBatchedValuesUpdate(DataSet * data, std::function<void(float)> progressCallback = [](float){});
Expand All @@ -109,7 +110,7 @@ class DatabaseInterface

//Columns & Data/Values
//Index stuff:
int columnInsert( int dataSetId, int index = -1, const std::string & name = "", columnType colType = columnType::unknown); ///< Insert a row into Columns and create the corresponding columns in DataSet_? Also makes sure the indices are correct
int columnInsert( int dataSetId, int index = -1, const std::string & name = "", columnType colType = columnType::unknown, bool alterTable=true); ///< Insert a row into Columns and create the corresponding columns in DataSet_? Also makes sure the indices are correct
int columnLastFreeIndex( int dataSetId);
void columnIndexIncrements( int dataSetId, int index); ///< If index already is in use that column and all after are incremented by 1
void columnIndexDecrements( int dataSetId, int index); ///< Indices bigger than index are decremented, assumption is that the previous one using it has been removed already
Expand Down Expand Up @@ -158,6 +159,7 @@ class DatabaseInterface
void transactionReadBegin(); ///< runs BEGIN DEFERRED and waits for sqlite to not be busy anymore if some other process is writing Tracks whether nested and only does BEGIN+COMMIT at lowest depth
void transactionReadEnd(); ///< runs COMMIT and ends the transaction. Tracks whether nested and only does BEGIN+COMMIT at lowest depth


private:
void _doubleTroubleBinder(sqlite3_stmt *stmt, int param, double dbl); ///< Needed to work around the lack of support for NAN, INF and NEG_INF in sqlite, converts those to string to make use of sqlite flexibility
double _doubleTroubleReader(sqlite3_stmt *stmt, int colI); ///< The reading counterpart to _doubleTroubleBinder to convert string representations of NAN, INF and NEG_INF back to double
Expand All @@ -168,6 +170,7 @@ class DatabaseInterface
void load(); ///< Loads a sqlite database from sessiondir (after loading a jaspfile)
void close(); ///< Closes the loaded database and disconnects
bool tableHasColumn(const std::string & tableName, const std::string & columnName);
void dbStartUpPragmas();

int _transactionWriteDepth = 0,
_transactionReadDepth = 0;
Expand Down
13 changes: 10 additions & 3 deletions CommonData/dataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ void DataSet::dbDelete()

_dataSetID = -1;


db().transactionWriteEnd();
}

Expand Down Expand Up @@ -171,12 +172,12 @@ void DataSet::removeColumn(const std::string & name)
}
}

void DataSet::insertColumn(size_t index)
void DataSet::insertColumn(size_t index, bool alterDataSetTable)
{

assert(_dataSetID > 0);

Column * newColumn = new Column(this, db().columnInsert(_dataSetID, index));
Column * newColumn = new Column(this, db().columnInsert(_dataSetID, index, "", columnType::unknown, alterDataSetTable));

_columns.insert(_columns.begin()+index, newColumn);

Expand Down Expand Up @@ -382,18 +383,24 @@ void DataSet::setColumnCount(size_t colCount)
db().transactionWriteBegin();

int curCount = columns().size();

bool alterTableAfterwards = curCount == 0 && colCount > 0;

if(colCount > curCount)
for(size_t i=curCount; i<colCount; i++)
insertColumn(i);
insertColumn(i, !alterTableAfterwards);

else if(colCount < curCount)
for(size_t i=curCount-1; i>=colCount; i--)
removeColumn(i);


incRevision();

db().transactionWriteEnd();

if(alterTableAfterwards)
db().dataSetCreateTable(this);
}

void DataSet::setRowCount(size_t rowCount)
Expand Down
4 changes: 2 additions & 2 deletions CommonData/dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ class DataSet : public DataSetBaseNode
void beginBatchedToDB();
void endBatchedToDB(std::function<void(float)> progressCallback = [](float){}, Columns columns={});
void endBatchedToDB(Columns columns) { endBatchedToDB([](float){}, columns); }

void removeColumn( const std::string & name );
void removeColumn( size_t index );
void removeColumnById( size_t id );
void insertColumn( size_t index );
void insertColumn( size_t index, bool alterDataSetTable = true);
Column * newColumn( const std::string & name);
int getColumnIndex( const std::string & name ) const;
int columnIndex( const Column * col ) const;
Expand Down

0 comments on commit 26c89fc

Please sign in to comment.