Skip to content

Commit

Permalink
feat: implement importing excel (.xls .xlsx) files (#5638)
Browse files Browse the repository at this point in the history
* implement importing excel files

* update ubuntu build bot

* work on Windows

* code style formatting and cleanup

* add to installer and file type support

* showing file type filter as a list

* Use conan to load freexl

* add some missing supported filetypes to macos manifest thing

* use jasp-stats fork of shuns fixes for freexl

* address reviews suggestions

* fix build on Linux

* some cleanup and comments

* replace newlines in excel cell value

---------

Co-authored-by: boutinb <b.boutin@uva.nl>
Co-authored-by: Joris Goosen <joris@jorisgoosen.nl>
  • Loading branch information
3 people authored Sep 12, 2024
1 parent 3102e9e commit 90a9752
Show file tree
Hide file tree
Showing 26 changed files with 463 additions and 126 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ jobs:
sudo apt install libmpfr-dev #required by rmpfr packages
sudo apt install libglpk-dev #required by igraph packages
sudo apt install jags
sudo apt install libminizip-dev # required by freexl
git clone https://github.com/jasp-stats/freexl.git
cd freexl && ./configure && make && sudo make install
- name: Install boost
uses: MarkusJx/install-boost@v2.4.4
Expand Down
2 changes: 1 addition & 1 deletion Common/utilenums.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#define UTILENUMS_H
#include "enumutilities.h"

DECLARE_ENUM(FileTypeBase, jasp = 0, html, csv, txt, tsv, sav, zsav, ods, pdf, sas7bdat, sas7bcat, por, xpt, dta, database, empty, unknown );
DECLARE_ENUM(FileTypeBase, jasp = 0, html, csv, txt, tsv, sav, zsav, ods, xls, xlsx, pdf, sas7bdat, sas7bcat, por, xpt, dta, database, empty, unknown );

//const QStringList Database::dbTypes() const should be updated if DbType is changed.
DECLARE_ENUM(DbType, NOTCHOSEN, QDB2, /*QIBASE,*/ QMYSQL, QOCI, QODBC, QPSQL, QSQLITE /*, QSQLITE2, QTDS*/ );
Expand Down
6 changes: 5 additions & 1 deletion Desktop/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ qt_add_executable(
${BUNDLE_RESOURCES}
$<$<PLATFORM_ID:Windows>:${CMAKE_CURRENT_LIST_DIR}/icon.rc>
$<$<PLATFORM_ID:Darwin>:${_R_Framework}>
$<$<PLATFORM_ID:Windows>:${CMAKE_SOURCE_DIR}/Desktop/JASP.exe.manifest>
$<$<PLATFORM_ID:Windows>:${CMAKE_SOURCE_DIR}/Desktop/JASP.exe.manifest>
)

set(
Expand Down Expand Up @@ -105,6 +105,7 @@ target_include_directories(
$<$<BOOL:${FLATPAK_USED}>:/app/include/QtCore5Compat>
$<$<BOOL:${FLATPAK_USED}>:/app/include/QtWebEngineQuick>
$<$<BOOL:${FLATPAK_USED}>:/app/include/QtWebEngineCore>
${LIBFREEXL_INCLUDE_DIRS}
)

target_link_libraries(
Expand Down Expand Up @@ -145,6 +146,9 @@ target_link_libraries(
Iconv::Iconv
OpenSSL::SSL
OpenSSL::Crypto
# FreeXL
${LIBFREEXL_LIBRARIES}
$<$<NOT:$<BOOL:${LINUX_LOCAL_BUILD}>>:freexl::freexl>
# ReadStat -----------------------------------
${LIBREADSTAT_LIBRARIES}
# MinGW's ReadStat
Expand Down
4 changes: 4 additions & 0 deletions Desktop/data/datasetloader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
#include "importers/jaspimporterold.h"
#include "importers/odsimporter.h"
#include "importers/readstatimporter.h"
#include "importers/excelimporter.h"


#include <QFileInfo>

Expand All @@ -51,6 +53,8 @@ Importer* DataSetLoader::getImporter(const string & locator, const string &ext)
boost::iequals(ext,".txt") ||
boost::iequals(ext,".tsv")) return new CSVImporter();
if( boost::iequals(ext,".ods")) return new ODSImporter();
if( boost::iequals(ext,".xls") ||
boost::iequals(ext,".xlsx")) return new ExcelImporter();
if( ReadStatImporter::extSupported(ext)) return new ReadStatImporter(ext);

return nullptr; //If NULL then JASP will try to load it as a .jasp file (if the extension matches)
Expand Down
4 changes: 3 additions & 1 deletion Desktop/data/fileevent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,9 @@ QString FileEvent::getProgressMsg() const
case Utils::FileType::txt:
case Utils::FileType::tsv:
case Utils::FileType::ods: return tr("Importing Data from %1").arg(FileTypeBaseToQString(_type).toUpper());
case Utils::FileType::sav:
case Utils::FileType::xls:
case Utils::FileType::xlsx: return tr("Importing Excel File");
case Utils::FileType::sav:
case Utils::FileType::zsav:
case Utils::FileType::por: return tr("Importing SPSS File");
case Utils::FileType::xpt:
Expand Down
2 changes: 1 addition & 1 deletion Desktop/data/importers/csvimporter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ ImportDataSet* CSVImporter::loadFile(const string &locator, std::function<void(i
ImportDataSet* result = new ImportDataSet(this);
stringvec colNames;
CSV csv(locator);
csv.open();
csv.open();

csv.readLine(colNames);
vector<CSVImportColumn *> importColumns;
Expand Down
119 changes: 119 additions & 0 deletions Desktop/data/importers/excel/excel.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
//
// Copyright (C) 2013-2024 University of Amsterdam
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//

#include "excel.h"
#include "utilities/qutils.h"
#include <stringutils.h>

#include <QFileInfo>
#include <QDebug>

Excel::Excel(const std::string &locator)
{
_path = locator;
}

void Excel::open()
{
QFileInfo fi(tq(_path));
_fileSize = fi.size();

if (_fileSize < 0)
throw std::runtime_error("Could not access file");

if (_fileSize == 0)
throw std::runtime_error("File is empty");
}

void Excel::openWorkbook()
{
QString xlsFilePath = tq(_path);
const char* utf8Path = _path.c_str(); //But it would be better to just use _path.c_str() directly if you need it. It is in utf8 in any case.
QString extension = QFileInfo(xlsFilePath).suffix().toLower();

int ret = 0;
if (extension == "xls")
ret = freexl_open(utf8Path, &_handle);
else if (extension == "xlsx")
ret = freexl_open_xlsx(utf8Path, &_handle);
else
throw std::runtime_error("Unsupported file format: " + fq(extension));

if(ret != FREEXL_OK)
throw std::runtime_error("Unexpected error while loading excel file, error code: " + std::to_string(ret));
}

void Excel::selectActiveWorksheet()
{
int ret = freexl_select_active_worksheet(_handle, 0); // import the first worksheet(index=0) by default.
if (ret != FREEXL_OK)
throw std::runtime_error("Could not select active worksheet,\n error code: " + std::to_string(ret));
}

void Excel::getWorksheetDimensions(uint32_t &rows, uint16_t &cols) {
int ret = freexl_worksheet_dimensions(_handle, &rows, &cols);

if (ret != FREEXL_OK)
throw std::runtime_error("Could not read worksheet dimensions, error code: " + std::to_string(ret));

_numCols = cols; //get cols count while read sheet
}

void Excel::getCellValue(uint32_t &row, uint16_t &col, std::string &cellValue)
{
FreeXL_CellValue cell;
int ret = freexl_get_cell_value(_handle, row, col, &cell);

if (ret != FREEXL_OK)
cellValue = "ERROR " + std::to_string(ret);

switch (cell.type)
{
case FREEXL_CELL_TEXT:
case FREEXL_CELL_SST_TEXT:
case FREEXL_CELL_DATE: // So we store it as a character for now until support for date types.
case FREEXL_CELL_DATETIME:
case FREEXL_CELL_TIME:
cellValue = cell.value.text_value;
cellValue = stringUtils::replaceBy(cellValue, "\n", "_");
break;
case FREEXL_CELL_INT:
cellValue = std::to_string(cell.value.int_value);
break;
case FREEXL_CELL_DOUBLE:
cellValue = std::to_string(cell.value.double_value);
break;
case FREEXL_CELL_NULL:
default:
cellValue = "";
break;
}
}

uint16_t Excel::countCols()
{
return _numCols;
}

void Excel::close()
{
if (_handle)
{
freexl_close(_handle);
_handle = nullptr;
}
}
53 changes: 53 additions & 0 deletions Desktop/data/importers/excel/excel.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
//
// Copyright (C) 2013-2024 University of Amsterdam
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//

#ifndef EXCEL_H
#define EXCEL_H

#include <string>
#include <stdint.h>

#include <freexl.h>

class Excel
{
public:
Excel(const std::string &path);

void open();
void close();

void openWorkbook();
void selectActiveWorksheet();
void getWorksheetDimensions(uint32_t &rows, uint16_t &cols);
void getCellValue(uint32_t &row, uint16_t &col, std::string &cellValue);

uint16_t countCols();

private:

long _fileSize;
long _filePosition;
uint16_t _numCols;

private:

std::string _path;
const void *_handle;
};

#endif // EXCEL_H
31 changes: 31 additions & 0 deletions Desktop/data/importers/excel/excelimportcolumn.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include "excelimportcolumn.h"
#include "timers.h"

ExcelImportColumn::ExcelImportColumn(ImportDataSet* importDataSet, std::string name) : ImportColumn(importDataSet, name)
{
}

ExcelImportColumn::ExcelImportColumn(ImportDataSet *importDataSet, std::string name, long reserve) : ImportColumn(importDataSet, name)
{
_data.reserve(reserve);
}

ExcelImportColumn::~ExcelImportColumn()
{
JASPTIMER_SCOPE(ExcelImportColumn::~ExcelImportColumn());
}

size_t ExcelImportColumn::size() const
{
return _data.size();
}

void ExcelImportColumn::addValue(const std::string &value)
{
_data.push_back(value);
}

const std::vector<std::string> &ExcelImportColumn::getValues() const
{
return _data;
}
26 changes: 26 additions & 0 deletions Desktop/data/importers/excel/excelimportcolumn.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#ifndef EXCELIMPORTCOLUMN_H
#define EXCELIMPORTCOLUMN_H

#include "data/importers/importcolumn.h"


class ExcelImportColumn : public ImportColumn
{
public:
ExcelImportColumn(ImportDataSet* importDataSet, std::string name);
ExcelImportColumn(ImportDataSet* importDataSet, std::string name, long reserve);
~ExcelImportColumn() override;

size_t size() const override;
const stringvec & allValuesAsStrings() const override { return _data; }
void addValue(const std::string &value);
const stringvec & getValues() const;


private:
stringvec _data;

};


#endif // EXCELIMPORTCOLUMN_H
Loading

0 comments on commit 90a9752

Please sign in to comment.